[clang] [llvm] [AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate (PR #127043)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 07:23:23 PDT 2025
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/127043
>From 27e42e14212adfd8c6025c0c9464e95df5ec879e Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 13 Feb 2025 11:07:24 +0000
Subject: [PATCH] [AARCH64][Neon] switch to using bitcasts in arm_neon.h where
appropriate
---
clang/include/clang/Basic/TargetBuiltins.h | 4 +
clang/include/clang/Basic/arm_neon.td | 68 +-
clang/lib/CodeGen/CodeGenFunction.h | 8 +-
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 102 +-
.../CodeGen/AArch64/bf16-dotprod-intrinsics.c | 384 +-
.../CodeGen/AArch64/bf16-getset-intrinsics.c | 30 +-
.../AArch64/bf16-reinterpret-intrinsics.c | 452 +-
.../fp8-intrinsics/acle_neon_fp8_cvt.c | 44 +-
.../fp8-intrinsics/acle_neon_fp8_fdot.c | 84 +-
.../fp8-intrinsics/acle_neon_fp8_fmla.c | 84 +-
.../acle_neon_fp8_reinterpret.c | 158 +-
clang/test/CodeGen/AArch64/neon-2velem.c | 1826 +-
clang/test/CodeGen/AArch64/neon-extract.c | 373 +-
clang/test/CodeGen/AArch64/neon-fma.c | 146 +-
clang/test/CodeGen/AArch64/neon-fp16fml.c | 1426 +-
.../AArch64/neon-intrinsics-constrained.c | 1862 +-
clang/test/CodeGen/AArch64/neon-intrinsics.c | 26255 ++++++++-----
.../CodeGen/AArch64/neon-ldst-one-rcpc3.c | 40 +-
clang/test/CodeGen/AArch64/neon-ldst-one.c | 8535 ++---
.../CodeGen/AArch64/neon-misc-constrained.c | 111 +-
clang/test/CodeGen/AArch64/neon-misc.c | 4130 +-
clang/test/CodeGen/AArch64/neon-perm.c | 2877 +-
.../neon-scalar-x-indexed-elem-constrained.c | 308 +-
.../AArch64/neon-scalar-x-indexed-elem.c | 653 +-
clang/test/CodeGen/AArch64/neon-vcmla.c | 1314 +-
clang/test/CodeGen/AArch64/poly-add.c | 2 +-
clang/test/CodeGen/AArch64/poly128.c | 56 +-
clang/test/CodeGen/AArch64/poly64.c | 781 +-
.../CodeGen/AArch64/v8.1a-neon-intrinsics.c | 98 +-
.../v8.2a-neon-intrinsics-constrained.c | 902 +-
.../AArch64/v8.2a-neon-intrinsics-generic.c | 288 +-
.../CodeGen/AArch64/v8.2a-neon-intrinsics.c | 1184 +-
.../AArch64/v8.5a-neon-frint3264-intrinsic.c | 251 +-
.../CodeGen/AArch64/v8.6a-neon-intrinsics.c | 232 +-
.../CodeGen/arm-bf16-dotprod-intrinsics.c | 386 +-
.../test/CodeGen/arm-bf16-getset-intrinsics.c | 32 +-
.../test/CodeGen/arm-neon-directed-rounding.c | 347 +-
clang/test/CodeGen/arm-neon-fma.c | 66 +-
clang/test/CodeGen/arm-neon-numeric-maxmin.c | 62 +-
clang/test/CodeGen/arm-neon-vcvtX.c | 114 +-
clang/test/CodeGen/arm-neon-vst.c | 4138 +-
clang/test/CodeGen/arm64-vrnd-constrained.c | 219 +-
clang/test/CodeGen/arm64-vrnd.c | 121 +-
clang/test/CodeGen/arm64_vcreate.c | 21 +-
clang/test/CodeGen/arm64_vdupq_n_f64.c | 96 +-
clang/test/CodeGen/arm_neon_intrinsics.c | 31749 ++++++++++------
clang/utils/TableGen/NeonEmitter.cpp | 28 +-
.../v8.2a-neon-intrinsics-constrained.ll | 276 +
48 files changed, 56329 insertions(+), 36394 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.ll
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 4781054240b5b..c1ba65064f159 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -263,6 +263,10 @@ namespace clang {
EltType ET = getEltType();
return ET == Poly8 || ET == Poly16 || ET == Poly64;
}
+ bool isFloatingPoint() const {
+ EltType ET = getEltType();
+ return ET == Float16 || ET == Float32 || ET == Float64 || ET == BFloat16;
+ }
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
bool isQuad() const { return (Flags & QuadFlag) != 0; }
unsigned getEltSizeInBits() const {
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 3e73dd054933f..ab0051efe5159 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -31,8 +31,8 @@ def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
(call "vget_high", $p1))>;
def OP_MULLHi_P64 : Op<(call "vmull",
- (cast "poly64_t", (call "vget_high", $p0)),
- (cast "poly64_t", (call "vget_high", $p1)))>;
+ (bitcast "poly64_t", (call "vget_high", $p0)),
+ (bitcast "poly64_t", (call "vget_high", $p1)))>;
def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
(call "vget_high", $p2))>;
@@ -95,11 +95,11 @@ def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
(decimate (rotl mask1, 1), 2)))>;
-def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
-def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
-def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
-def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
-def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
+def OP_EQ : Op<(bitcast "R", (op "==", $p0, $p1))>;
+def OP_GE : Op<(bitcast "R", (op ">=", $p0, $p1))>;
+def OP_LE : Op<(bitcast "R", (op "<=", $p0, $p1))>;
+def OP_GT : Op<(bitcast "R", (op ">", $p0, $p1))>;
+def OP_LT : Op<(bitcast "R", (op "<", $p0, $p1))>;
def OP_NEG : Op<(op "-", $p0)>;
def OP_NOT : Op<(op "~", $p0)>;
def OP_AND : Op<(op "&", $p0, $p1)>;
@@ -108,20 +108,20 @@ def OP_XOR : Op<(op "^", $p0, $p1)>;
def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
def OP_CAST : LOp<[(save_temp $promote, $p0),
- (cast "R", $promote)]>;
+ (bitcast "R", $promote)]>;
def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
def OP_DUP : Op<(dup $p0)>;
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
-def OP_SEL : Op<(cast "R", (op "|",
- (op "&", $p0, (cast $p0, $p1)),
- (op "&", (op "~", $p0), (cast $p0, $p2))))>;
+def OP_SEL : Op<(bitcast "R", (op "|",
+ (op "&", $p0, (bitcast $p0, $p1)),
+ (op "&", (op "~", $p0), (bitcast $p0, $p2))))>;
def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
-def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
+def OP_SQXTUN : Op<(call "vcombine", (bitcast $p0, "U", $p0),
(call "vqmovun", $p1))>;
def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
@@ -129,12 +129,12 @@ def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
-def OP_REINT : Op<(cast "R", $p0)>;
+def OP_REINT : Op<(bitcast "R", $p0)>;
def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
-def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
+def OP_ABDL : Op<(bitcast "R", (call "vmovl", (bitcast $p0, "U",
(call "vabd", $p0, $p1))))>;
def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
(call "vget_high", $p1))>;
@@ -152,15 +152,15 @@ def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
(call "vget_high", $p2))>;
def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
def OP_DIV : Op<(op "/", $p0, $p1)>;
-def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
+def OP_LONG_HI : Op<(bitcast "R", (call (name_replace "_high_", "_"),
(call "vget_high", $p0), $p1))>;
-def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
- (cast "R", "H", $p0),
- (cast "R", "H",
+def OP_NARROW_HI : Op<(bitcast "R", (call "vcombine",
+ (bitcast "R", "H", $p0),
+ (bitcast "R", "H",
(call (name_replace "_high_", "_"),
$p1, $p2))))>;
def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
- (cast "R",
+ (bitcast "R",
(call "vshll_n", $a1, (literal "int32_t", "0")))]>;
def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
@@ -221,18 +221,18 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
def OP_USDOT_LN
: Op<(call "vusdot", $p0, $p1,
- (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
+ (bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
def OP_USDOT_LNQ
: Op<(call "vusdot", $p0, $p1,
- (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
+ (bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
// sudot splats the second vector and then calls vusdot
def OP_SUDOT_LN
: Op<(call "vusdot", $p0,
- (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
+ (bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
def OP_SUDOT_LNQ
: Op<(call "vusdot", $p0,
- (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
+ (bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
def OP_BFDOT_LN
: Op<(call "vbfdot", $p0, $p1,
@@ -263,7 +263,7 @@ def OP_VCVT_BF16_F32_A32
: Op<(call "__a32_vcvt_bf16", $p0)>;
def OP_VCVT_BF16_F32_LO_A32
- : Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
+ : Op<(call "vcombine", (bitcast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
(call "__a32_vcvt_bf16", $p0))>;
def OP_VCVT_BF16_F32_HI_A32
: Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
@@ -924,12 +924,12 @@ def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
-def CMEQ : SInst<"vceqz", "U.",
+def CMEQ : SInst<"vceqz", "U(.!)",
"csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
-def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
-def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
-def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
-def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
+def CMGE : SInst<"vcgez", "U(.!)", "csilfdQcQsQiQlQfQd">;
+def CMLE : SInst<"vclez", "U(.!)", "csilfdQcQsQiQlQfQd">;
+def CMGT : SInst<"vcgtz", "U(.!)", "csilfdQcQsQiQlQfQd">;
+def CMLT : SInst<"vcltz", "U(.!)", "csilfdQcQsQiQlQfQd">;
////////////////////////////////////////////////////////////////////////////////
// Max/Min Integer
@@ -1667,11 +1667,11 @@ let TargetGuard = "fullfp16,neon" in {
// ARMv8.2-A FP16 one-operand vector intrinsics.
// Comparison
- def CMEQH : SInst<"vceqz", "U.", "hQh">;
- def CMGEH : SInst<"vcgez", "U.", "hQh">;
- def CMGTH : SInst<"vcgtz", "U.", "hQh">;
- def CMLEH : SInst<"vclez", "U.", "hQh">;
- def CMLTH : SInst<"vcltz", "U.", "hQh">;
+ def CMEQH : SInst<"vceqz", "U(.!)", "hQh">;
+ def CMGEH : SInst<"vcgez", "U(.!)", "hQh">;
+ def CMGTH : SInst<"vcgtz", "U(.!)", "hQh">;
+ def CMLEH : SInst<"vclez", "U(.!)", "hQh">;
+ def CMLTH : SInst<"vcltz", "U(.!)", "hQh">;
// Vector conversion
def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ca00a0e8c6cf4..dd73d3b3a75f3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4694,10 +4694,10 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
- const llvm::CmpInst::Predicate Fp,
- const llvm::CmpInst::Predicate Ip,
- const llvm::Twine &Name = "");
+ llvm::Value *
+ EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
+ const llvm::CmpInst::Predicate Pred,
+ const llvm::Twine &Name = "");
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 63b3d17f97f85..afe25b5418424 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1750,8 +1750,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(NeonTypeConst->getZExtValue());
- bool Usgn = Type.isUnsigned();
- bool Quad = Type.isQuad();
+ const bool Usgn = Type.isUnsigned();
+ const bool Quad = Type.isQuad();
+ const bool Floating = Type.isFloatingPoint();
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
const bool AllowBFloatArgsAndRet =
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
@@ -1852,24 +1853,28 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vceqz_v:
case NEON::BI__builtin_neon_vceqzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
- ICmpInst::ICMP_EQ, "vceqz");
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
case NEON::BI__builtin_neon_vcgez_v:
case NEON::BI__builtin_neon_vcgezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
- ICmpInst::ICMP_SGE, "vcgez");
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
+ "vcgez");
case NEON::BI__builtin_neon_vclez_v:
case NEON::BI__builtin_neon_vclezq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
- ICmpInst::ICMP_SLE, "vclez");
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
+ "vclez");
case NEON::BI__builtin_neon_vcgtz_v:
case NEON::BI__builtin_neon_vcgtzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
- ICmpInst::ICMP_SGT, "vcgtz");
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
+ "vcgtz");
case NEON::BI__builtin_neon_vcltz_v:
case NEON::BI__builtin_neon_vcltzq_v:
- return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
- ICmpInst::ICMP_SLT, "vcltz");
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
+ "vcltz");
case NEON::BI__builtin_neon_vclz_v:
case NEON::BI__builtin_neon_vclzq_v:
// We generate target-independent intrinsic, which needs a second argument
@@ -2432,28 +2437,32 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return Builder.CreateBitCast(Result, ResultType, NameHint);
}
-Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
- Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
- const CmpInst::Predicate Ip, const Twine &Name) {
- llvm::Type *OTy = Op->getType();
-
- // FIXME: this is utterly horrific. We should not be looking at previous
- // codegen context to find out what needs doing. Unfortunately TableGen
- // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
- // (etc).
- if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
- OTy = BI->getOperand(0)->getType();
-
- Op = Builder.CreateBitCast(Op, OTy);
- if (OTy->getScalarType()->isFloatingPointTy()) {
- if (Fp == CmpInst::FCMP_OEQ)
- Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
+Value *
+CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
+ const CmpInst::Predicate Pred,
+ const Twine &Name) {
+
+ if (isa<FixedVectorType>(Ty)) {
+ // Vector types are cast to i8 vectors. Recover original type.
+ Op = Builder.CreateBitCast(Op, Ty);
+ }
+
+ if (CmpInst::isFPPredicate(Pred)) {
+ if (Pred == CmpInst::FCMP_OEQ)
+ Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
else
- Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
+ Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
} else {
- Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
+ Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
}
- return Builder.CreateSExt(Op, Ty, Name);
+
+ llvm::Type *ResTy = Ty;
+ if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
+ ResTy = FixedVectorType::get(
+ IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
+ VTy->getNumElements());
+
+ return Builder.CreateSExt(Op, ResTy, Name);
}
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
@@ -5955,45 +5964,66 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vceqzd_s64:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType(getContext())),
+ ICmpInst::ICMP_EQ, "vceqz");
case NEON::BI__builtin_neon_vceqzd_f64:
case NEON::BI__builtin_neon_vceqzs_f32:
case NEON::BI__builtin_neon_vceqzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
+ ICmpInst::FCMP_OEQ, "vceqz");
case NEON::BI__builtin_neon_vcgezd_s64:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType(getContext())),
+ ICmpInst::ICMP_SGE, "vcgez");
case NEON::BI__builtin_neon_vcgezd_f64:
case NEON::BI__builtin_neon_vcgezs_f32:
case NEON::BI__builtin_neon_vcgezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
+ ICmpInst::FCMP_OGE, "vcgez");
case NEON::BI__builtin_neon_vclezd_s64:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType(getContext())),
+ ICmpInst::ICMP_SLE, "vclez");
case NEON::BI__builtin_neon_vclezd_f64:
case NEON::BI__builtin_neon_vclezs_f32:
case NEON::BI__builtin_neon_vclezh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
+ ICmpInst::FCMP_OLE, "vclez");
case NEON::BI__builtin_neon_vcgtzd_s64:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType(getContext())),
+ ICmpInst::ICMP_SGT, "vcgtz");
case NEON::BI__builtin_neon_vcgtzd_f64:
case NEON::BI__builtin_neon_vcgtzs_f32:
case NEON::BI__builtin_neon_vcgtzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
+ ICmpInst::FCMP_OGT, "vcgtz");
case NEON::BI__builtin_neon_vcltzd_s64:
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ return EmitAArch64CompareBuiltinExpr(
+ Ops[0], ConvertType(E->getCallReturnType(getContext())),
+ ICmpInst::ICMP_SLT, "vcltz");
+
case NEON::BI__builtin_neon_vcltzd_f64:
case NEON::BI__builtin_neon_vcltzs_f32:
case NEON::BI__builtin_neon_vcltzh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
- ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
+ ICmpInst::FCMP_OLT, "vcltz");
case NEON::BI__builtin_neon_vceqzd_u64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
diff --git a/clang/test/CodeGen/AArch64/bf16-dotprod-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-dotprod-intrinsics.c
index 877d83c0fa395..2097495b3baee 100644
--- a/clang/test/CodeGen/AArch64/bf16-dotprod-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-dotprod-intrinsics.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
-// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -8,10 +8,16 @@
// CHECK-LABEL: @test_vbfdot_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b) {
@@ -20,10 +26,16 @@ float32x2_t test_vbfdot_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b) {
// CHECK-LABEL: @test_vbfdotq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b){
@@ -32,19 +44,24 @@ float32x4_t test_vbfdotq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b){
// CHECK-LABEL: @test_vbfdot_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_128:%.*]] = alloca <4 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_128:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_128]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_128]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_128]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_128]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
@@ -53,19 +70,24 @@ float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
// CHECK-LABEL: @test_vbfdotq_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_130:%.*]] = alloca <8 x bfloat>, align 16
-// CHECK-NEXT: [[__REINT1_130:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_130]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_130]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_130]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_130]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP4]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x bfloat> [[TMP6]] to <8 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
@@ -74,19 +96,24 @@ float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b
// CHECK-LABEL: @test_vbfdot_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_132:%.*]] = alloca <8 x bfloat>, align 16
-// CHECK-NEXT: [[__REINT1_132:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_132]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_132]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_132]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_132]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP4]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b) {
@@ -95,19 +122,24 @@ float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
// CHECK-LABEL: @test_vbfdotq_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_126:%.*]] = alloca <4 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_126:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_126]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_126]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_126]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_126]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x bfloat> [[TMP6]] to <8 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
@@ -116,12 +148,20 @@ float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
// CHECK-LABEL: @test_vbfmmlaq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmmla(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMMLAQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMMLAQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMMLAQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmmla(<4 x float> [[VBFMMLAQ_F32_I]], <8 x bfloat> [[VBFMMLAQ_F321_I]], <8 x bfloat> [[VBFMMLAQ_F322_I]])
// CHECK-NEXT: [[VBFMMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMMLAQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMMLAQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMMLAQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmmlaq_f32(r, a, b);
@@ -129,12 +169,20 @@ float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-LABEL: @test_vbfmlalbq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlalbq_f32(r, a, b);
@@ -142,12 +190,20 @@ float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-LABEL: @test_vbfmlaltq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlaltq_f32(r, a, b);
@@ -157,26 +213,34 @@ float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
return vbfmlalbq_lane_f32(r, a, b, 0);
@@ -186,26 +250,34 @@ float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGETQ_LANE18]], i32 4
-// CHECK-NEXT: [[VGETQ_LANE23:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGETQ_LANE23]], i32 5
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[VGETQ_LANE22:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGETQ_LANE22]], i32 4
// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGETQ_LANE28]], i32 6
-// CHECK-NEXT: [[VGETQ_LANE33:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGETQ_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGETQ_LANE28]], i32 5
+// CHECK-NEXT: [[VGETQ_LANE34:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGETQ_LANE34]], i32 6
+// CHECK-NEXT: [[VGETQ_LANE40:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGETQ_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlalbq_laneq_f32(r, a, b, 3);
@@ -215,26 +287,34 @@ float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
return vbfmlaltq_lane_f32(r, a, b, 0);
@@ -244,26 +324,34 @@ float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGETQ_LANE18]], i32 4
-// CHECK-NEXT: [[VGETQ_LANE23:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGETQ_LANE23]], i32 5
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[VGETQ_LANE22:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGETQ_LANE22]], i32 4
// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGETQ_LANE28]], i32 6
-// CHECK-NEXT: [[VGETQ_LANE33:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGETQ_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGETQ_LANE28]], i32 5
+// CHECK-NEXT: [[VGETQ_LANE34:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGETQ_LANE34]], i32 6
+// CHECK-NEXT: [[VGETQ_LANE40:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGETQ_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlaltq_laneq_f32(r, a, b, 3);
diff --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
index 9da2cd5af3221..d54e56697f8b8 100644
--- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
-// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -45,9 +45,10 @@ bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
// CHECK-LABEL: @test_vdup_lane_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
//
bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
@@ -56,9 +57,10 @@ bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
// CHECK-LABEL: @test_vdupq_lane_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP2]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
//
bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
@@ -67,9 +69,10 @@ bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
// CHECK-LABEL: @test_vdup_laneq_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
//
bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
@@ -78,9 +81,10 @@ bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
// CHECK-LABEL: @test_vdupq_laneq_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
//
bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
diff --git a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
index 2b271ac88462b..88f2305e2782c 100644
--- a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c
@@ -1,333 +1,413 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vreinterpret_bf16_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
-// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT: ret <4 x bfloat> [[TMP1]]
//
bfloat16x4_t test_vreinterpret_bf16_s8(int8x8_t a) { return vreinterpret_bf16_s8(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_s16(int16x4_t a) { return vreinterpret_bf16_s16(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_s32(int32x2_t a) { return vreinterpret_bf16_s32(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <4 x bfloat>
-// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT: ret <4 x bfloat> [[TMP1]]
//
bfloat16x4_t test_vreinterpret_bf16_f32(float32x2_t a) { return vreinterpret_bf16_f32(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
-// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT: ret <4 x bfloat> [[TMP1]]
//
bfloat16x4_t test_vreinterpret_bf16_u8(uint8x8_t a) { return vreinterpret_bf16_u8(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_u16(uint16x4_t a) { return vreinterpret_bf16_u16(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_u32(uint32x2_t a) { return vreinterpret_bf16_u32(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_p8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A:%.*]] to <4 x bfloat>
-// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT: ret <4 x bfloat> [[TMP1]]
//
bfloat16x4_t test_vreinterpret_bf16_p8(poly8x8_t a) { return vreinterpret_bf16_p8(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_p16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_p16(poly16x4_t a) { return vreinterpret_bf16_p16(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_u64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_u64(uint64x1_t a) { return vreinterpret_bf16_u64(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_s64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_s64(int64x1_t a) { return vreinterpret_bf16_s64(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
-// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT: ret <8 x bfloat> [[TMP1]]
//
bfloat16x8_t test_vreinterpretq_bf16_s8(int8x16_t a) { return vreinterpretq_bf16_s8(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_s16(int16x8_t a) { return vreinterpretq_bf16_s16(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_s32(int32x4_t a) { return vreinterpretq_bf16_s32(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x bfloat>
-// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT: ret <8 x bfloat> [[TMP1]]
//
bfloat16x8_t test_vreinterpretq_bf16_f32(float32x4_t a) { return vreinterpretq_bf16_f32(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
-// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT: ret <8 x bfloat> [[TMP1]]
//
bfloat16x8_t test_vreinterpretq_bf16_u8(uint8x16_t a) { return vreinterpretq_bf16_u8(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_u16(uint16x8_t a) { return vreinterpretq_bf16_u16(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_u32(uint32x4_t a) { return vreinterpretq_bf16_u32(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_p8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x bfloat>
-// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT: ret <8 x bfloat> [[TMP1]]
//
bfloat16x8_t test_vreinterpretq_bf16_p8(poly8x16_t a) { return vreinterpretq_bf16_p8(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_p16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_p16(poly16x8_t a) { return vreinterpretq_bf16_p16(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_u64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_u64(uint64x2_t a) { return vreinterpretq_bf16_u64(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_s64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_s64(int64x2_t a) { return vreinterpretq_bf16_s64(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_p64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <4 x bfloat>
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x bfloat>
// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
//
bfloat16x4_t test_vreinterpret_bf16_p64(poly64x1_t a) { return vreinterpret_bf16_p64(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_p64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_p64(poly64x2_t a) { return vreinterpretq_bf16_p64(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_p128(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <8 x bfloat>
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_p128(
+// CHECK-SAME: i128 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x bfloat>
// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
//
bfloat16x8_t test_vreinterpretq_bf16_p128(poly128_t a) { return vreinterpretq_bf16_p128(a); }
-// CHECK-LABEL: @test_vreinterpret_bf16_f64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <4 x bfloat>
-// CHECK-NEXT: ret <4 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x bfloat> @test_vreinterpret_bf16_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <4 x bfloat>
+// CHECK-NEXT: ret <4 x bfloat> [[TMP1]]
//
bfloat16x4_t test_vreinterpret_bf16_f64(float64x1_t a) { return vreinterpret_bf16_f64(a); }
-// CHECK-LABEL: @test_vreinterpretq_bf16_f64(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <8 x bfloat>
-// CHECK-NEXT: ret <8 x bfloat> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_vreinterpretq_bf16_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT: ret <8 x bfloat> [[TMP1]]
//
bfloat16x8_t test_vreinterpretq_bf16_f64(float64x2_t a) { return vreinterpretq_bf16_f64(a); }
-// CHECK-LABEL: @test_vreinterpret_s8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
int8x8_t test_vreinterpret_s8_bf16(bfloat16x4_t a) { return vreinterpret_s8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_s16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[TMP0]]
//
int16x4_t test_vreinterpret_s16_bf16(bfloat16x4_t a) { return vreinterpret_s16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_s32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x i32>
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <2 x i32>
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
//
int32x2_t test_vreinterpret_s32_bf16(bfloat16x4_t a) { return vreinterpret_s32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_f32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x float>
-// CHECK-NEXT: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
//
float32x2_t test_vreinterpret_f32_bf16(bfloat16x4_t a) { return vreinterpret_f32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_u8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
uint8x8_t test_vreinterpret_u8_bf16(bfloat16x4_t a) { return vreinterpret_u8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_u16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[TMP0]]
//
uint16x4_t test_vreinterpret_u16_bf16(bfloat16x4_t a) { return vreinterpret_u16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_u32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <2 x i32>
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <2 x i32>
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
//
uint32x2_t test_vreinterpret_u32_bf16(bfloat16x4_t a) { return vreinterpret_u32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_p8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
poly8x8_t test_vreinterpret_p8_bf16(bfloat16x4_t a) { return vreinterpret_p8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_p16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[TMP0]]
//
poly16x4_t test_vreinterpret_p16_bf16(bfloat16x4_t a) { return vreinterpret_p16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_u64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
uint64x1_t test_vreinterpret_u64_bf16(bfloat16x4_t a) { return vreinterpret_u64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_s64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
int64x1_t test_vreinterpret_s64_bf16(bfloat16x4_t a) { return vreinterpret_s64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_p64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
poly64x1_t test_vreinterpret_p64_bf16(bfloat16x4_t a) { return vreinterpret_p64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_s8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
int8x16_t test_vreinterpretq_s8_bf16(bfloat16x8_t a) { return vreinterpretq_s8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_s16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vreinterpretq_s16_bf16(bfloat16x8_t a) { return vreinterpretq_s16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_s32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x i32>
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vreinterpretq_s32_bf16(bfloat16x8_t a) { return vreinterpretq_s32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_f32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
//
float32x4_t test_vreinterpretq_f32_bf16(bfloat16x8_t a) { return vreinterpretq_f32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_u8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
uint8x16_t test_vreinterpretq_u8_bf16(bfloat16x8_t a) { return vreinterpretq_u8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_u16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
uint16x8_t test_vreinterpretq_u16_bf16(bfloat16x8_t a) { return vreinterpretq_u16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_u32_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <4 x i32>
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vreinterpretq_u32_bf16(bfloat16x8_t a) { return vreinterpretq_u32_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_p8_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
poly8x16_t test_vreinterpretq_p8_bf16(bfloat16x8_t a) { return vreinterpretq_p8_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_p16_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
poly16x8_t test_vreinterpretq_p16_bf16(bfloat16x8_t a) { return vreinterpretq_p16_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_u64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
uint64x2_t test_vreinterpretq_u64_bf16(bfloat16x8_t a) { return vreinterpretq_u64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_s64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
int64x2_t test_vreinterpretq_s64_bf16(bfloat16x8_t a) { return vreinterpretq_s64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_p64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x i64>
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
poly64x2_t test_vreinterpretq_p64_bf16(bfloat16x8_t a) { return vreinterpretq_p64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_p128_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to i128
+// CHECK-LABEL: define dso_local i128 @test_vreinterpretq_p128_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
//
poly128_t test_vreinterpretq_p128_bf16(bfloat16x8_t a) { return vreinterpretq_p128_bf16(a); }
-// CHECK-LABEL: @test_vreinterpret_f64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <1 x double>
-// CHECK-NEXT: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_bf16(
+// CHECK-SAME: <4 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
//
float64x1_t test_vreinterpret_f64_bf16(bfloat16x4_t a) { return vreinterpret_f64_bf16(a); }
-// CHECK-LABEL: @test_vreinterpretq_f64_bf16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_bf16(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
//
float64x2_t test_vreinterpretq_f64_bf16(bfloat16x8_t a) { return vreinterpretq_f64_bf16(a); }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 4305b840f2a05..9913c54567719 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -S -O3 -o /dev/null %s
@@ -274,19 +274,27 @@ mfloat8x16_t test_vcvt_high_mf8_f32_fpm(mfloat8x8_t vd, float32x4_t vn,
// CHECK-LABEL: define dso_local <8 x i8> @test_vcvt_mf8_f16_fpm(
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VM]] to <8 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VM]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VFCVTN2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.fp8.fcvtn.v8i8.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
+// CHECK-NEXT: [[VFCVTN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VFCVTN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VFCVTN2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.fp8.fcvtn.v8i8.v4f16(<4 x half> [[VFCVTN_I]], <4 x half> [[VFCVTN1_I]])
// CHECK-NEXT: ret <8 x i8> [[VFCVTN2_I]]
//
// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z21test_vcvt_mf8_f16_fpm13__Float16x4_tS_m(
// CHECK-CXX-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <8 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VM]] to <8 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VM]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-CXX-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VFCVTN2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.fp8.fcvtn.v8i8.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
+// CHECK-CXX-NEXT: [[VFCVTN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-CXX-NEXT: [[VFCVTN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-CXX-NEXT: [[VFCVTN2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.fp8.fcvtn.v8i8.v4f16(<4 x half> [[VFCVTN_I]], <4 x half> [[VFCVTN1_I]])
// CHECK-CXX-NEXT: ret <8 x i8> [[VFCVTN2_I]]
//
mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, float16x4_t vm, fpm_t fpm) {
@@ -296,19 +304,27 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, float16x4_t vm, fpm_t fpm) {
// CHECK-LABEL: define dso_local <16 x i8> @test_vcvtq_mf8_f16_fpm(
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VM]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VM]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VFCVTN2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
+// CHECK-NEXT: [[VFCVTN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VFCVTN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VFCVTN2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half> [[VFCVTN_I]], <8 x half> [[VFCVTN1_I]])
// CHECK-NEXT: ret <16 x i8> [[VFCVTN2_I]]
//
// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z22test_vcvtq_mf8_f16_fpm13__Float16x8_tS_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <16 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VM]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VM]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VFCVTN2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
+// CHECK-CXX-NEXT: [[VFCVTN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-CXX-NEXT: [[VFCVTN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-CXX-NEXT: [[VFCVTN2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half> [[VFCVTN_I]], <8 x half> [[VFCVTN1_I]])
// CHECK-CXX-NEXT: ret <16 x i8> [[VFCVTN2_I]]
//
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) {
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 4d2f5d550c4dc..44db59df6c1c4 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -O3 -Werror -Wall -S -o /dev/null %s
@@ -12,17 +12,21 @@
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+// CHECK-NEXT: [[FDOT2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT2_I]], <8 x i8> [[VN]], <8 x i8> [[VM]])
// CHECK-NEXT: ret <4 x half> [[FDOT21_I]]
//
// CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z13test_vdot_f1613__Float16x4_t13__Mfloat8x8_tS0_m(
// CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]])
+// CHECK-CXX-NEXT: [[FDOT2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[FDOT2_I]], <8 x i8> [[VN]], <8 x i8> [[VM]])
// CHECK-CXX-NEXT: ret <4 x half> [[FDOT21_I]]
//
float16x4_t test_vdot_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) {
@@ -32,17 +36,21 @@ float16x4_t test_vdot_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t
// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_f16(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-NEXT: [[FDOT2_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[FDOT2_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-NEXT: ret <8 x half> [[FDOT21_I]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z14test_vdotq_f1613__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-CXX-NEXT: [[FDOT2_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[FDOT2_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-CXX-NEXT: ret <8 x half> [[FDOT21_I]]
//
float16x8_t test_vdotq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) {
@@ -52,21 +60,23 @@ float16x8_t test_vdotq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_lane_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3)
+// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP2]], i32 3)
// CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z18test_vdot_lane_f1613__Float16x4_t13__Mfloat8x8_tS0_m(
// CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3)
+// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP2]], i32 3)
// CHECK-CXX-NEXT: ret <4 x half> [[FDOT2_LANE1]]
//
float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) {
@@ -76,18 +86,20 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, f
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
// CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z19test_vdot_laneq_f1613__Float16x4_t13__Mfloat8x8_t14__Mfloat8x16_tm(
// CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
// CHECK-CXX-NEXT: ret <4 x half> [[FDOT2_LANE1]]
//
@@ -98,21 +110,23 @@ float16x4_t test_vdot_laneq_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm,
// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_lane_f16(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3)
+// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 3)
// CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z19test_vdotq_lane_f1613__Float16x8_t14__Mfloat8x16_t13__Mfloat8x8_tm(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3)
+// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 3)
// CHECK-CXX-NEXT: ret <8 x half> [[FDOT2_LANE1]]
//
float16x8_t test_vdotq_lane_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpmr) {
@@ -122,18 +136,20 @@ float16x8_t test_vdotq_lane_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm,
// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_laneq_f16(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
// CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z20test_vdotq_laneq_f1613__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
// CHECK-CXX-NEXT: ret <8 x half> [[FDOT2_LANE1]]
//
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index 736538073cb39..d4f074a92b05b 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
@@ -11,17 +11,21 @@
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-NEXT: [[VMLAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VMLAL_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalb13__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-CXX-NEXT: [[VMLAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VMLAL_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]]
//
float16x8_t test_vmlalb(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) {
@@ -31,17 +35,21 @@ float16x8_t test_vmlalb(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalt(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-NEXT: [[VMLAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VMLAL_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalt13__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
+// CHECK-CXX-NEXT: [[VMLAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VMLAL_I]], <16 x i8> [[VN]], <16 x i8> [[VM]])
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]]
//
float16x8_t test_vmlalt(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) {
@@ -123,21 +131,23 @@ float32x4_t test_vmlalltt(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_lane(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 0)
+// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 0)
// CHECK-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z16test_vmlalb_lane13__Float16x8_t14__Mfloat8x16_t13__Mfloat8x8_tm(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 0)
+// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 0)
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpm) {
@@ -147,18 +157,20 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fp
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
// CHECK-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z17test_vmlalb_laneq13__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
@@ -169,21 +181,23 @@ float16x8_t test_vmlalb_laneq(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm,
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalt_lane(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 7)
+// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 7)
// CHECK-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z16test_vmlalt_lane13__Float16x8_t14__Mfloat8x16_t13__Mfloat8x8_tm(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0)
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 7)
+// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP2]], i32 7)
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
float16x8_t test_vmlalt_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpm) {
@@ -193,18 +207,20 @@ float16x8_t test_vmlalt_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fp
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalt_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 15)
// CHECK-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z17test_vmlalt_laneq13__Float16x8_t14__Mfloat8x16_tS0_m(
// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
-// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-CXX-NEXT: [[VMLAL_LANE:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-CXX-NEXT: [[VMLAL_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.lane.v8f16(<8 x half> [[VMLAL_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 15)
// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL_LANE1]]
//
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
index 201d4dbbe34ad..adf5fceb9ceb9 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
#include <arm_neon.h>
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
@@ -23,14 +23,16 @@ poly8x8_t test_vreinterpret_p8_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
// CHECK-CXX-LABEL: define dso_local noundef <1 x i64> @_Z25test_vreinterpret_p64_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-CXX-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-CXX-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
poly64x1_t test_vreinterpret_p64_mf8(mfloat8x8_t v) {
return vreinterpret_p64_mf8(v);
@@ -182,14 +184,16 @@ int8x16_t test_vreinterpretq_s8_mf8(mfloat8x16_t v) {
// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_mf8(
// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <2 x double> @_Z26test_vreinterpretq_f64_mf814__Mfloat8x16_t(
// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <2 x double>
-// CHECK-CXX-NEXT: ret <2 x double> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <2 x i64>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-CXX-NEXT: ret <2 x double> [[TMP1]]
//
float64x2_t test_vreinterpretq_f64_mf8(mfloat8x16_t v) {
return vreinterpretq_f64_mf8(v);
@@ -197,14 +201,16 @@ float64x2_t test_vreinterpretq_f64_mf8(mfloat8x16_t v) {
// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_mf8(
// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z26test_vreinterpretq_f32_mf814__Mfloat8x16_t(
// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <4 x float>
-// CHECK-CXX-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <4 x i32>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-CXX-NEXT: ret <4 x float> [[TMP1]]
//
float32x4_t test_vreinterpretq_f32_mf8(mfloat8x16_t v) {
return vreinterpretq_f32_mf8(v);
@@ -212,14 +218,16 @@ float32x4_t test_vreinterpretq_f32_mf8(mfloat8x16_t v) {
// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_mf8(
// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <8 x half>
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z26test_vreinterpretq_f16_mf814__Mfloat8x16_t(
// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <8 x half>
-// CHECK-CXX-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[V]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-CXX-NEXT: ret <8 x half> [[TMP1]]
//
float16x8_t test_vreinterpretq_f16_mf8(mfloat8x16_t v) {
return vreinterpretq_f16_mf8(v);
@@ -386,14 +394,16 @@ mfloat8x16_t test_vreinterpretq_mf8_s8(int8x16_t v) {
// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_mf8_f64(
// CHECK-SAME: <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z26test_vreinterpretq_mf8_f6413__Float64x2_t(
// CHECK-CXX-SAME: <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
-// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: ret <16 x i8> [[TMP1]]
//
mfloat8x16_t test_vreinterpretq_mf8_f64(float64x2_t v) {
return vreinterpretq_mf8_f64(v);
@@ -401,14 +411,16 @@ mfloat8x16_t test_vreinterpretq_mf8_f64(float64x2_t v) {
// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_mf8_f32(
// CHECK-SAME: <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z26test_vreinterpretq_mf8_f3213__Float32x4_t(
// CHECK-CXX-SAME: <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: ret <16 x i8> [[TMP1]]
//
mfloat8x16_t test_vreinterpretq_mf8_f32(float32x4_t v) {
return vreinterpretq_mf8_f32(v);
@@ -416,14 +428,16 @@ mfloat8x16_t test_vreinterpretq_mf8_f32(float32x4_t v) {
// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_mf8_f16(
// CHECK-SAME: <8 x half> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[V]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[V]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z26test_vreinterpretq_mf8_f1613__Float16x8_t(
// CHECK-CXX-SAME: <8 x half> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[V]] to <16 x i8>
-// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[V]] to <8 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-CXX-NEXT: ret <16 x i8> [[TMP1]]
//
mfloat8x16_t test_vreinterpretq_mf8_f16(float16x8_t v) {
return vreinterpretq_mf8_f16(v);
@@ -519,14 +533,16 @@ uint32x2_t test_vreinterpret_u32_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
// CHECK-CXX-LABEL: define dso_local noundef <1 x i64> @_Z25test_vreinterpret_u64_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-CXX-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-CXX-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
uint64x1_t test_vreinterpret_u64_mf8(mfloat8x8_t v) {
return vreinterpret_u64_mf8(v);
@@ -562,14 +578,18 @@ int8x8_t test_vreinterpret_s8_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x double>
-// CHECK-NEXT: ret <1 x double> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <1 x double> @_Z25test_vreinterpret_f64_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x double>
-// CHECK-CXX-NEXT: ret <1 x double> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-CXX-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-CXX-NEXT: ret <1 x double> [[TMP1]]
//
float64x1_t test_vreinterpret_f64_mf8(mfloat8x8_t v) {
return vreinterpret_f64_mf8(v);
@@ -577,14 +597,16 @@ float64x1_t test_vreinterpret_f64_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <2 x float>
-// CHECK-NEXT: ret <2 x float> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <2 x float> @_Z25test_vreinterpret_f32_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <2 x float>
-// CHECK-CXX-NEXT: ret <2 x float> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <2 x i32>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-CXX-NEXT: ret <2 x float> [[TMP1]]
//
float32x2_t test_vreinterpret_f32_mf8(mfloat8x8_t v) {
return vreinterpret_f32_mf8(v);
@@ -592,14 +614,16 @@ float32x2_t test_vreinterpret_f32_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <4 x half>
-// CHECK-NEXT: ret <4 x half> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z25test_vreinterpret_f16_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <4 x half>
-// CHECK-CXX-NEXT: ret <4 x half> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-CXX-NEXT: ret <4 x half> [[TMP1]]
//
float16x4_t test_vreinterpret_f16_mf8(mfloat8x8_t v) {
return vreinterpret_f16_mf8(v);
@@ -622,14 +646,16 @@ int32x2_t test_vreinterpret_s32_mf8(mfloat8x8_t v) {
// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_mf8(
// CHECK-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
// CHECK-CXX-LABEL: define dso_local noundef <1 x i64> @_Z25test_vreinterpret_s64_mf813__Mfloat8x8_t(
// CHECK-CXX-SAME: <8 x i8> [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to <1 x i64>
-// CHECK-CXX-NEXT: ret <1 x i64> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
+// CHECK-CXX-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
//
int64x1_t test_vreinterpret_s64_mf8(mfloat8x8_t v) {
return vreinterpret_s64_mf8(v);
@@ -751,14 +777,18 @@ mfloat8x8_t test_vreinterpret_mf8_s8(int8x8_t v) {
// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_mf8_f64(
// CHECK-SAME: <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z25test_vreinterpret_mf8_f6413__Float64x1_t(
// CHECK-CXX-SAME: <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
-// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to i64
+// CHECK-CXX-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-CXX-NEXT: ret <8 x i8> [[TMP1]]
//
mfloat8x8_t test_vreinterpret_mf8_f64(float64x1_t v) {
return vreinterpret_mf8_f64(v);
@@ -766,14 +796,16 @@ mfloat8x8_t test_vreinterpret_mf8_f64(float64x1_t v) {
// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_mf8_f32(
// CHECK-SAME: <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z25test_vreinterpret_mf8_f3213__Float32x2_t(
// CHECK-CXX-SAME: <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-CXX-NEXT: ret <8 x i8> [[TMP1]]
//
mfloat8x8_t test_vreinterpret_mf8_f32(float32x2_t v) {
return vreinterpret_mf8_f32(v);
@@ -781,14 +813,16 @@ mfloat8x8_t test_vreinterpret_mf8_f32(float32x2_t v) {
// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_mf8_f16(
// CHECK-SAME: <4 x half> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[V]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[V]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
//
// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z25test_vreinterpret_mf8_f1613__Float16x4_t(
// CHECK-CXX-SAME: <4 x half> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[V]] to <8 x i8>
-// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[V]] to <4 x i16>
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-CXX-NEXT: ret <8 x i8> [[TMP1]]
//
mfloat8x8_t test_vreinterpret_mf8_f16(float16x4_t v) {
return vreinterpret_mf8_f16(v);
diff --git a/clang/test/CodeGen/AArch64/neon-2velem.c b/clang/test/CodeGen/AArch64/neon-2velem.c
index 75bdeb92fd9ca..2bc7212cde9f8 100644
--- a/clang/test/CodeGen/AArch64/neon-2velem.c
+++ b/clang/test/CodeGen/AArch64/neon-2velem.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -407,13 +407,16 @@ uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) {
// CHECK-LABEL: @test_vfma_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
// CHECK-NEXT: ret <2 x float> [[FMLA2]]
//
@@ -423,13 +426,16 @@ float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfmaq_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
// CHECK-NEXT: ret <4 x float> [[FMLA2]]
//
@@ -439,15 +445,18 @@ float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfma_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
-// CHECK-NEXT: ret <2 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP7]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfma_laneq_f32(a, b, v, 3);
@@ -455,15 +464,18 @@ float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmaq_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
-// CHECK-NEXT: ret <4 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP7]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmaq_laneq_f32(a, b, v, 3);
@@ -471,14 +483,17 @@ float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfms_lane_f32(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
// CHECK-NEXT: ret <2 x float> [[FMLA2]]
//
@@ -488,14 +503,17 @@ float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfmsq_lane_f32(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
// CHECK-NEXT: ret <4 x float> [[FMLA2]]
//
@@ -505,16 +523,19 @@ float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfms_laneq_f32(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
-// CHECK-NEXT: ret <2 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP7]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfms_laneq_f32(a, b, v, 3);
@@ -522,16 +543,19 @@ float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmsq_laneq_f32(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
-// CHECK-NEXT: ret <4 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP7]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmsq_laneq_f32(a, b, v, 3);
@@ -539,13 +563,17 @@ float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmaq_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]])
// CHECK-NEXT: ret <2 x double> [[FMLA2]]
//
@@ -555,15 +583,18 @@ float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
// CHECK-LABEL: @test_vfmaq_laneq_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
-// CHECK-NEXT: ret <2 x double> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP8]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP7]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmaq_laneq_f64(a, b, v, 1);
@@ -571,14 +602,18 @@ float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
// CHECK-LABEL: @test_vfmsq_lane_f64(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]])
// CHECK-NEXT: ret <2 x double> [[FMLA2]]
//
@@ -588,16 +623,19 @@ float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
// CHECK-LABEL: @test_vfmsq_laneq_f64(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
-// CHECK-NEXT: ret <2 x double> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP8]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP7]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmsq_laneq_f64(a, b, v, 1);
@@ -653,7 +691,9 @@ float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -668,7 +708,9 @@ int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -683,7 +725,9 @@ int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -698,7 +742,9 @@ int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -714,7 +760,9 @@ int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -730,7 +778,9 @@ int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -746,7 +796,9 @@ int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -762,7 +814,9 @@ int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -777,7 +831,9 @@ int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -792,7 +848,9 @@ int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -807,7 +865,9 @@ int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -822,7 +882,9 @@ int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -838,7 +900,9 @@ int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -854,7 +918,9 @@ int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -870,7 +936,9 @@ int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -886,7 +954,9 @@ int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -901,7 +971,9 @@ int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -916,7 +988,9 @@ int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -931,7 +1005,9 @@ int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -946,7 +1022,9 @@ int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -962,7 +1040,9 @@ int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -978,7 +1058,9 @@ int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -994,7 +1076,9 @@ int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -1010,7 +1094,9 @@ int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -1025,7 +1111,9 @@ int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -1040,7 +1128,9 @@ int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -1055,7 +1145,9 @@ int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -1070,7 +1162,9 @@ int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -1086,7 +1180,9 @@ int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -1102,7 +1198,9 @@ int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -1118,7 +1216,9 @@ int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -1134,7 +1234,9 @@ int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -1149,7 +1251,9 @@ int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) {
@@ -1163,7 +1267,9 @@ int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) {
@@ -1177,7 +1283,9 @@ int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) {
@@ -1191,7 +1299,9 @@ uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) {
@@ -1206,7 +1316,9 @@ uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) {
@@ -1221,7 +1333,9 @@ int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) {
@@ -1236,7 +1350,9 @@ int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) {
@@ -1251,7 +1367,9 @@ uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) {
@@ -1265,7 +1383,9 @@ uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) {
@@ -1279,7 +1399,9 @@ int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) {
@@ -1293,7 +1415,9 @@ int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) {
@@ -1307,7 +1431,9 @@ uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) {
@@ -1322,7 +1448,9 @@ uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
@@ -1337,7 +1465,9 @@ int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
@@ -1352,7 +1482,9 @@ int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) {
@@ -1367,7 +1499,9 @@ uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) {
@@ -1382,8 +1516,11 @@ uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
@@ -1398,8 +1535,11 @@ int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -1415,8 +1555,11 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
@@ -1432,8 +1575,11 @@ int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -1448,8 +1594,11 @@ int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
@@ -1464,8 +1613,11 @@ int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -1481,8 +1633,11 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
@@ -1498,8 +1653,11 @@ int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -1513,9 +1671,12 @@ int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) {
return vqdmull_lane_s16(a, v, 3);
@@ -1528,9 +1689,12 @@ int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) {
return vqdmull_lane_s32(a, v, 1);
@@ -1543,9 +1707,12 @@ int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) {
return vqdmull_laneq_s16(a, v, 3);
@@ -1558,9 +1725,12 @@ int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) {
return vqdmull_laneq_s32(a, v, 3);
@@ -1574,9 +1744,12 @@ int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) {
return vqdmull_high_lane_s16(a, v, 3);
@@ -1590,9 +1763,12 @@ int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) {
return vqdmull_high_lane_s32(a, v, 1);
@@ -1606,9 +1782,12 @@ int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
return vqdmull_high_laneq_s16(a, v, 7);
@@ -1622,9 +1801,12 @@ int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) {
return vqdmull_high_laneq_s32(a, v, 3);
@@ -1736,9 +1918,10 @@ int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
// CHECK-LABEL: @test_vmul_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x float> [[MUL]]
//
@@ -1749,14 +1932,18 @@ float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmul_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP3]], i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
-// CHECK-NEXT: ret <1 x double> [[TMP5]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to double
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP5]], i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = fmul double [[TMP4]], [[EXTRACT]]
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP7]]
//
float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) {
return vmul_lane_f64(a, v, 0);
@@ -1764,9 +1951,10 @@ float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) {
// CHECK-LABEL: @test_vmulq_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <4 x float> [[MUL]]
//
@@ -1776,9 +1964,11 @@ float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulq_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP2]], <1 x double> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x double> [[MUL]]
//
@@ -1788,9 +1978,10 @@ float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) {
// CHECK-LABEL: @test_vmul_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x float> [[MUL]]
//
@@ -1800,14 +1991,17 @@ float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmul_laneq_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
-// CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
-// CHECK-NEXT: ret <1 x double> [[TMP5]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to double
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+// CHECK-NEXT: [[TMP6:%.*]] = fmul double [[TMP4]], [[EXTRACT]]
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP7]]
//
float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) {
return vmul_laneq_f64(a, v, 1);
@@ -1815,9 +2009,10 @@ float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) {
// CHECK-LABEL: @test_vmulq_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <4 x float> [[MUL]]
//
@@ -1827,9 +2022,10 @@ float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulq_laneq_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP2]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x double> [[MUL]]
//
@@ -1839,12 +2035,17 @@ float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) {
// CHECK-LABEL: @test_vmulx_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x float> [[VMULX2_I]]
//
float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) {
@@ -1853,12 +2054,17 @@ float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulxq_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x float> [[VMULX2_I]]
//
float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) {
@@ -1867,12 +2073,18 @@ float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulxq_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP2]], <1 x double> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[LANE]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x double> [[VMULX2_I]]
//
float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) {
@@ -1881,12 +2093,17 @@ float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) {
// CHECK-LABEL: @test_vmulx_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x float> [[VMULX2_I]]
//
float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) {
@@ -1895,12 +2112,17 @@ float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulxq_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x float> [[VMULX2_I]]
//
float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) {
@@ -1909,12 +2131,17 @@ float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulxq_laneq_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[LANE]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x double> [[VMULX2_I]]
//
float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) {
@@ -2323,13 +2550,16 @@ uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
// CHECK-LABEL: @test_vfma_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
// CHECK-NEXT: ret <2 x float> [[FMLA2]]
//
@@ -2339,13 +2569,16 @@ float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfmaq_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
// CHECK-NEXT: ret <4 x float> [[FMLA2]]
//
@@ -2355,15 +2588,18 @@ float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfma_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
-// CHECK-NEXT: ret <2 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP7]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfma_laneq_f32(a, b, v, 0);
@@ -2371,15 +2607,18 @@ float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmaq_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
-// CHECK-NEXT: ret <4 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP7]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmaq_laneq_f32(a, b, v, 0);
@@ -2387,14 +2626,17 @@ float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v)
// CHECK-LABEL: @test_vfms_lane_f32_0(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]])
// CHECK-NEXT: ret <2 x float> [[FMLA2]]
//
@@ -2404,14 +2646,17 @@ float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfmsq_lane_f32_0(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]])
// CHECK-NEXT: ret <4 x float> [[FMLA2]]
//
@@ -2421,16 +2666,19 @@ float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: @test_vfms_laneq_f32_0(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]])
-// CHECK-NEXT: ret <2 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP7]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfms_laneq_f32(a, b, v, 0);
@@ -2438,16 +2686,19 @@ float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmsq_laneq_f32_0(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]])
-// CHECK-NEXT: ret <4 x float> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP8]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP7]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmsq_laneq_f32(a, b, v, 0);
@@ -2455,15 +2706,18 @@ float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v)
// CHECK-LABEL: @test_vfmaq_laneq_f64_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
-// CHECK-NEXT: ret <2 x double> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP8]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP7]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmaq_laneq_f64(a, b, v, 0);
@@ -2471,16 +2725,19 @@ float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v)
// CHECK-LABEL: @test_vfmsq_laneq_f64_0(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]])
-// CHECK-NEXT: ret <2 x double> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP8]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP7]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmsq_laneq_f64(a, b, v, 0);
@@ -2493,7 +2750,9 @@ float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v)
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2508,7 +2767,9 @@ int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2523,7 +2784,9 @@ int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2538,7 +2801,9 @@ int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2554,7 +2819,9 @@ int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2570,7 +2837,9 @@ int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2586,7 +2855,9 @@ int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2602,7 +2873,9 @@ int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2617,7 +2890,9 @@ int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2632,7 +2907,9 @@ int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2647,7 +2924,9 @@ int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2662,7 +2941,9 @@ int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2678,7 +2959,9 @@ int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2694,7 +2977,9 @@ int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2710,7 +2995,9 @@ int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2726,7 +3013,9 @@ int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2741,7 +3030,9 @@ int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2756,7 +3047,9 @@ int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2771,7 +3064,9 @@ int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2786,7 +3081,9 @@ int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2802,7 +3099,9 @@ int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2818,7 +3117,9 @@ int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2834,7 +3135,9 @@ int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD]]
//
@@ -2850,7 +3153,9 @@ int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD]]
//
@@ -2865,7 +3170,9 @@ int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2880,7 +3187,9 @@ int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2895,7 +3204,9 @@ int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2910,7 +3221,9 @@ int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2926,7 +3239,9 @@ int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2942,7 +3257,9 @@ int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2958,7 +3275,9 @@ int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB]]
//
@@ -2974,7 +3293,9 @@ int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB]]
//
@@ -2989,7 +3310,9 @@ int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) {
@@ -3003,7 +3326,9 @@ int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) {
@@ -3017,7 +3342,9 @@ int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) {
@@ -3031,7 +3358,9 @@ uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) {
@@ -3046,7 +3375,9 @@ uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
@@ -3061,7 +3392,9 @@ int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
@@ -3076,7 +3409,9 @@ int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) {
@@ -3091,7 +3426,9 @@ uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) {
@@ -3105,7 +3442,9 @@ uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
@@ -3119,7 +3458,9 @@ int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
@@ -3133,7 +3474,9 @@ int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
@@ -3147,7 +3490,9 @@ uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
@@ -3162,7 +3507,9 @@ uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
@@ -3177,7 +3524,9 @@ int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
@@ -3192,7 +3541,9 @@ int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
//
uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
@@ -3207,7 +3558,9 @@ uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
//
uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
@@ -3222,8 +3575,11 @@ uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
@@ -3238,8 +3594,11 @@ int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -3255,8 +3614,11 @@ int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
@@ -3272,8 +3634,11 @@ int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -3288,8 +3653,11 @@ int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
@@ -3304,8 +3672,11 @@ int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
@@ -3321,8 +3692,11 @@ int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
@@ -3338,8 +3712,11 @@ int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
@@ -3353,9 +3730,12 @@ int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) {
return vqdmull_lane_s16(a, v, 0);
@@ -3368,9 +3748,12 @@ int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) {
return vqdmull_lane_s32(a, v, 0);
@@ -3383,9 +3766,12 @@ int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
return vqdmull_laneq_s16(a, v, 0);
@@ -3398,9 +3784,12 @@ int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
return vqdmull_laneq_s32(a, v, 0);
@@ -3414,9 +3803,12 @@ int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
return vqdmull_high_lane_s16(a, v, 0);
@@ -3430,9 +3822,12 @@ int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
return vqdmull_high_lane_s32(a, v, 0);
@@ -3446,9 +3841,12 @@ int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
return vqdmull_high_laneq_s16(a, v, 0);
@@ -3462,9 +3860,12 @@ int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) {
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
//
int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) {
return vqdmull_high_laneq_s32(a, v, 0);
@@ -3576,9 +3977,10 @@ int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) {
// CHECK-LABEL: @test_vmul_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x float> [[MUL]]
//
@@ -3588,9 +3990,10 @@ float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulq_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <4 x float> [[MUL]]
//
@@ -3600,9 +4003,10 @@ float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmul_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x float> [[MUL]]
//
@@ -3612,14 +4016,17 @@ float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmul_laneq_f64_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]]
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double>
-// CHECK-NEXT: ret <1 x double> [[TMP5]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to double
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = fmul double [[TMP4]], [[EXTRACT]]
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP7]]
//
float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
return vmul_laneq_f64(a, v, 0);
@@ -3627,9 +4034,10 @@ float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
// CHECK-LABEL: @test_vmulq_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <4 x float> [[MUL]]
//
@@ -3639,9 +4047,10 @@ float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulq_laneq_f64_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]]
// CHECK-NEXT: ret <2 x double> [[MUL]]
//
@@ -3651,12 +4060,17 @@ float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) {
// CHECK-LABEL: @test_vmulx_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x float> [[VMULX2_I]]
//
float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) {
@@ -3665,12 +4079,17 @@ float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulxq_lane_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x float> [[VMULX2_I]]
//
float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) {
@@ -3679,12 +4098,18 @@ float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) {
// CHECK-LABEL: @test_vmulxq_lane_f64_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP2]], <1 x double> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[LANE]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x double> [[VMULX2_I]]
//
float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) {
@@ -3693,12 +4118,17 @@ float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) {
// CHECK-LABEL: @test_vmulx_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x float> [[VMULX2_I]]
//
float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) {
@@ -3707,12 +4137,17 @@ float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulxq_laneq_f32_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x float> [[VMULX2_I]]
//
float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) {
@@ -3721,12 +4156,17 @@ float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) {
// CHECK-LABEL: @test_vmulxq_laneq_f64_0(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[LANE]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]])
// CHECK-NEXT: ret <2 x double> [[VMULX2_I]]
//
float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
@@ -3742,7 +4182,9 @@ float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
//
int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
@@ -3756,7 +4198,9 @@ int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
//
int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
@@ -3772,7 +4216,9 @@ int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
//
uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
@@ -3786,7 +4232,9 @@ uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
//
uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
@@ -3802,9 +4250,12 @@ uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]])
// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
return vqdmull_high_n_s16(a, b);
@@ -3817,9 +4268,12 @@ int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]])
// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
//
int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
return vqdmull_high_n_s32(a, b);
@@ -3834,7 +4288,9 @@ int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
//
@@ -3849,7 +4305,9 @@ int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
//
@@ -3866,7 +4324,9 @@ int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
//
@@ -3881,7 +4341,9 @@ uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
//
@@ -3899,8 +4361,11 @@ uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
-// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]]
//
int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
@@ -3915,8 +4380,11 @@ int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
-// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]]
//
int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
@@ -3932,7 +4400,9 @@ int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
@@ -3947,7 +4417,9 @@ int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
//
@@ -3964,7 +4436,9 @@ int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
@@ -3979,7 +4453,9 @@ uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
//
@@ -3997,8 +4473,11 @@ uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]])
-// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]]
//
int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
@@ -4013,8 +4492,11 @@ int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]])
-// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]]
//
int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) {
@@ -4060,11 +4542,17 @@ float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[N:%.*]], i32 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[N]], i32 1
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[B]], <2 x float> [[VECINIT1_I]], <2 x float> [[A]])
-// CHECK-NEXT: ret <2 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
return vfma_n_f32(a, b, n);
@@ -4073,11 +4561,20 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
// CHECK-LABEL: @test_vfma_n_f64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x double> poison, double [[N:%.*]], i32 0
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[B]], <1 x double> [[VECINIT_I]], <1 x double> [[A]])
-// CHECK-NEXT: ret <1 x double> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B:%.*]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to i64
+// CHECK-NEXT: [[__P2_ADDR_I3_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I3_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// CHECK-NEXT: ret <1 x double> [[TMP9]]
//
float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
return vfma_n_f64(a, b, n);
@@ -4089,11 +4586,17 @@ float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[N]], i32 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[N]], i32 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[N]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[VECINIT3_I]], <4 x float> [[A]])
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
return vfmaq_n_f32(a, b, n);
@@ -4104,11 +4607,17 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[B:%.*]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[N:%.*]], i32 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[N]], i32 1
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FNEG_I]], <2 x float> [[VECINIT1_I]], <2 x float> [[A]])
-// CHECK-NEXT: ret <2 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
return vfms_n_f32(a, b, n);
@@ -4118,11 +4627,20 @@ float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[B:%.*]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x double> poison, double [[N:%.*]], i32 0
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FNEG_I]], <1 x double> [[VECINIT_I]], <1 x double> [[A]])
-// CHECK-NEXT: ret <1 x double> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to i64
+// CHECK-NEXT: [[__P2_ADDR_I3_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I3_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// CHECK-NEXT: ret <1 x double> [[TMP9]]
//
float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
return vfms_n_f64(a, b, n);
@@ -4135,11 +4653,17 @@ float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[N]], i32 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[N]], i32 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[N]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FNEG_I]], <4 x float> [[VECINIT3_I]], <4 x float> [[A]])
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
return vfmsq_n_f32(a, b, n);
@@ -4261,7 +4785,9 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
//
int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
@@ -4274,7 +4800,9 @@ int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
//
int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
@@ -4289,7 +4817,9 @@ int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
//
uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
@@ -4302,7 +4832,9 @@ uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
//
uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
@@ -4317,9 +4849,12 @@ uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]])
// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
return vqdmull_n_s16(a, b);
@@ -4331,9 +4866,12 @@ int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]])
// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
//
int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
return vqdmull_n_s32(a, b);
@@ -4347,9 +4885,12 @@ int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]])
// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <4 x i16> [[VQDMULH_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
//
int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
return vqdmulh_n_s16(a, b);
@@ -4367,9 +4908,12 @@ int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
-// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[VECINIT7_I]])
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]])
// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqdmulhq_n_s16(a, b);
@@ -4381,9 +4925,12 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]])
// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <2 x i32> [[VQDMULH_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
//
int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
return vqdmulh_n_s32(a, b);
@@ -4397,9 +4944,12 @@ int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
-// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]])
// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqdmulhq_n_s32(a, b);
@@ -4413,9 +4963,12 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]])
// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <4 x i16> [[VQRDMULH_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
//
int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
return vqrdmulh_n_s16(a, b);
@@ -4433,9 +4986,12 @@ int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
-// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[VECINIT7_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]])
// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqrdmulhq_n_s16(a, b);
@@ -4447,9 +5003,12 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]])
// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <2 x i32> [[VQRDMULH_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
//
int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
return vqrdmulh_n_s32(a, b);
@@ -4463,9 +5022,12 @@ int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
-// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[VECINIT3_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]])
// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_V2_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqrdmulhq_n_s32(a, b);
@@ -4595,7 +5157,9 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
//
@@ -4609,7 +5173,9 @@ int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
//
@@ -4625,7 +5191,9 @@ int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
//
@@ -4639,7 +5207,9 @@ uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
//
@@ -4656,8 +5226,11 @@ uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
-// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]]
//
int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
@@ -4671,8 +5244,11 @@ int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
-// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]]
//
int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
@@ -4803,7 +5379,9 @@ uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
@@ -4817,7 +5395,9 @@ int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
//
@@ -4833,7 +5413,9 @@ int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
@@ -4847,7 +5429,9 @@ uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]]
// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
//
@@ -4864,8 +5448,11 @@ uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]])
-// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]]
//
int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
@@ -4879,8 +5466,11 @@ int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]])
-// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]]
//
int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
@@ -4999,8 +5589,11 @@ uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
@@ -5015,8 +5608,11 @@ int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
@@ -5032,8 +5628,11 @@ int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
@@ -5049,8 +5648,11 @@ int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
@@ -5169,8 +5771,11 @@ uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
@@ -5185,8 +5790,11 @@ int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
@@ -5202,8 +5810,11 @@ int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
@@ -5219,8 +5830,11 @@ int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) {
@@ -5443,8 +6057,11 @@ uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
@@ -5459,8 +6076,11 @@ int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
@@ -5476,8 +6096,11 @@ int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
//
int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
@@ -5493,8 +6116,11 @@ int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
//
int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
@@ -5613,8 +6239,11 @@ uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
@@ -5629,8 +6258,11 @@ int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
@@ -5646,8 +6278,11 @@ int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
//
int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
@@ -5663,8 +6298,11 @@ int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]])
-// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
//
int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) {
diff --git a/clang/test/CodeGen/AArch64/neon-extract.c b/clang/test/CodeGen/AArch64/neon-extract.c
index e5699f813131f..75dba0d93406a 100644
--- a/clang/test/CodeGen/AArch64/neon-extract.c
+++ b/clang/test/CodeGen/AArch64/neon-extract.c
@@ -1,246 +1,329 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vext_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vext_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
return vext_s8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vext_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vext_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
return vext_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vext_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i32> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vext_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[VEXT]]
+//
int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
return vext_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vext_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[VEXT]]
+//
int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
return vext_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vextq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vextq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
return vextq_s8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vextq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vextq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
return vextq_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vextq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-// CHECK: ret <4 x i32> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vextq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-NEXT: ret <4 x i32> [[VEXT]]
+//
int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
return vextq_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vextq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[VEXT]]
+//
int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
return vextq_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vext_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vext_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
return vext_u8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vext_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vext_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
return vext_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vext_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i32> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vext_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[VEXT]]
+//
uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
return vext_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vext_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[VEXT]]
+//
uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
return vext_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vextq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vextq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
return vextq_u8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vextq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vextq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
return vextq_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vextq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-// CHECK: ret <4 x i32> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vextq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-NEXT: ret <4 x i32> [[VEXT]]
+//
uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
return vextq_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vextq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[VEXT]]
+//
uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
return vextq_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vext_f32(<2 x float> noundef %a, <2 x float> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x float> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vext_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x float> [[VEXT]]
+//
float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
return vext_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vext_f64(<1 x double> noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x double> [[TMP2]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x double> [[VEXT]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vext_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x double> [[VEXT]]
+//
float64x1_t test_vext_f64(float64x1_t a, float64x1_t b) {
return vext_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vextq_f32(<4 x float> noundef %a, <4 x float> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-// CHECK: ret <4 x float> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vextq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-NEXT: ret <4 x float> [[VEXT]]
+//
float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
return vextq_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vextq_f64(<2 x double> noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x double> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vextq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x double> [[VEXT]]
+//
float64x2_t test_vextq_f64(float64x2_t a, float64x2_t b) {
return vextq_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vext_p8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vext_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
return vext_p8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vext_p16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vext_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
return vext_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vextq_p8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vextq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
return vextq_p8(a, b, 2);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vextq_p16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vextq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
return vextq_p16(a, b, 3);
}
diff --git a/clang/test/CodeGen/AArch64/neon-fma.c b/clang/test/CodeGen/AArch64/neon-fma.c
index b87c531b8b231..06531ce0a372b 100644
--- a/clang/test/CodeGen/AArch64/neon-fma.c
+++ b/clang/test/CodeGen/AArch64/neon-fma.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -64,9 +64,10 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vmla_lane_f32_0
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[ADD]]
@@ -78,9 +79,10 @@ float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlaq_lane_f32_0
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[ADD]]
@@ -92,9 +94,10 @@ float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmla_laneq_f32_0
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[ADD]]
@@ -106,9 +109,10 @@ float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlaq_laneq_f32_0
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[ADD]]
@@ -120,9 +124,10 @@ float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v)
// CHECK-LABEL: define {{[^@]+}}@test_vmls_lane_f32_0
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[SUB]]
@@ -134,9 +139,10 @@ float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlsq_lane_f32_0
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[SUB]]
@@ -148,9 +154,10 @@ float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmls_laneq_f32_0
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[SUB]]
@@ -162,9 +169,10 @@ float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlsq_laneq_f32_0
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> zeroinitializer
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[SUB]]
@@ -176,9 +184,10 @@ float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v)
// CHECK-LABEL: define {{[^@]+}}@test_vmla_lane_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[ADD]]
@@ -190,9 +199,10 @@ float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlaq_lane_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[ADD]]
@@ -204,9 +214,10 @@ float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmla_laneq_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[ADD]]
@@ -218,9 +229,10 @@ float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlaq_laneq_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[ADD]]
@@ -232,9 +244,10 @@ float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmls_lane_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[SUB]]
@@ -246,9 +259,10 @@ float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlsq_lane_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[SUB]]
@@ -259,9 +273,10 @@ float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmls_laneq_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <2 x float> [[SUB]]
@@ -273,9 +288,10 @@ float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
// CHECK-LABEL: define {{[^@]+}}@test_vmlsq_laneq_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
// CHECK-NEXT: ret <4 x float> [[SUB]]
@@ -289,11 +305,17 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C]], i32 1
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[VECINIT1_I]], <2 x double> [[A]])
-// CHECK-NEXT: ret <2 x double> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
return vfmaq_n_f64(a, b, c);
@@ -305,11 +327,17 @@ float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[B]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C]], i32 1
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FNEG_I]], <2 x double> [[VECINIT1_I]], <2 x double> [[A]])
-// CHECK-NEXT: ret <2 x double> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
//
float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
return vfmsq_n_f64(a, b, c);
diff --git a/clang/test/CodeGen/AArch64/neon-fp16fml.c b/clang/test/CodeGen/AArch64/neon-fp16fml.c
index 976045d6e79f3..0f69dbaa0f4d6 100644
--- a/clang/test/CodeGen/AArch64/neon-fp16fml.c
+++ b/clang/test/CodeGen/AArch64/neon-fp16fml.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +v8.2a -target-feature +neon -target-feature +fp16fml \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -12,10 +12,16 @@
// CHECK-LABEL: @test_vfmlal_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[VFMLAL_LOW_I]], <4 x half> [[VFMLAL_LOW1_I]], <4 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]]
//
float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -24,10 +30,16 @@ float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: @test_vfmlsl_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[VFMLSL_LOW_I]], <4 x half> [[VFMLSL_LOW1_I]], <4 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]]
//
float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -36,10 +48,16 @@ float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: @test_vfmlal_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[VFMLAL_HIGH_I]], <4 x half> [[VFMLAL_HIGH1_I]], <4 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]]
//
float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -48,10 +66,16 @@ float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: @test_vfmlsl_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[VFMLSL_HIGH_I]], <4 x half> [[VFMLSL_HIGH1_I]], <4 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]]
//
float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -60,10 +84,16 @@ float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: @test_vfmlalq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[VFMLAL_LOW_I]], <8 x half> [[VFMLAL_LOW1_I]], <8 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]]
//
float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -72,10 +102,16 @@ float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: @test_vfmlslq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[VFMLSL_LOW_I]], <8 x half> [[VFMLSL_LOW1_I]], <8 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]]
//
float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -84,10 +120,16 @@ float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: @test_vfmlalq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[VFMLAL_HIGH_I]], <8 x half> [[VFMLAL_HIGH1_I]], <8 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]]
//
float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -96,10 +138,16 @@ float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: @test_vfmlslq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[VFMLSL_HIGH_I]], <8 x half> [[VFMLSL_HIGH1_I]], <8 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]]
//
float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -110,42 +158,32 @@ float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: @test_vfmlal_lane_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[VFMLAL_LOW_I]], <4 x half> [[VFMLAL_LOW1_I]], <4 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]]
//
float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -154,42 +192,32 @@ float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c
// CHECK-LABEL: @test_vfmlal_lane_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[VFMLAL_HIGH_I]], <4 x half> [[VFMLAL_HIGH1_I]], <4 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]]
//
float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -198,74 +226,48 @@ float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t
// CHECK-LABEL: @test_vfmlalq_lane_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE39:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGET_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE49:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGET_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE59:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGET_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE69:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGET_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[VFMLAL_LOW_I]], <8 x half> [[VFMLAL_LOW1_I]], <8 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]]
//
float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
@@ -274,74 +276,48 @@ float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t
// CHECK-LABEL: @test_vfmlalq_lane_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE39:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGET_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE49:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGET_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE59:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGET_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE69:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGET_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[VFMLAL_HIGH_I]], <8 x half> [[VFMLAL_HIGH1_I]], <8 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]]
//
float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
@@ -350,42 +326,32 @@ float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t
// CHECK-LABEL: @test_vfmlal_laneq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[VFMLAL_LOW_I]], <4 x half> [[VFMLAL_LOW1_I]], <4 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]]
//
float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
@@ -394,42 +360,32 @@ float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t
// CHECK-LABEL: @test_vfmlal_laneq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[VFMLAL_HIGH_I]], <4 x half> [[VFMLAL_HIGH1_I]], <4 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]]
//
float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
@@ -438,74 +394,48 @@ float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t
// CHECK-LABEL: @test_vfmlalq_laneq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE39:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGETQ_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE49:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGETQ_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE59:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGETQ_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE69:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGETQ_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[VFMLAL_LOW_I]], <8 x half> [[VFMLAL_LOW1_I]], <8 x half> [[VFMLAL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]]
//
float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -514,74 +444,48 @@ float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t
// CHECK-LABEL: @test_vfmlalq_laneq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE39:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGETQ_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE49:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGETQ_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE59:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGETQ_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE69:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGETQ_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLAL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLAL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[VFMLAL_HIGH_I]], <8 x half> [[VFMLAL_HIGH1_I]], <8 x half> [[VFMLAL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]]
//
float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -590,42 +494,32 @@ float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_
// CHECK-LABEL: @test_vfmlsl_lane_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[VFMLSL_LOW_I]], <4 x half> [[VFMLSL_LOW1_I]], <4 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]]
//
float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -634,42 +528,32 @@ float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c
// CHECK-LABEL: @test_vfmlsl_lane_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[VFMLSL_HIGH_I]], <4 x half> [[VFMLSL_HIGH1_I]], <4 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]]
//
float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
@@ -678,74 +562,48 @@ float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t
// CHECK-LABEL: @test_vfmlslq_lane_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
-// CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE39:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGET_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE49:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGET_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE59:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGET_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE69:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGET_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[VFMLSL_LOW_I]], <8 x half> [[VFMLSL_LOW1_I]], <8 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]]
//
float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
@@ -754,74 +612,48 @@ float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t
// CHECK-LABEL: @test_vfmlslq_lane_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[C:%.*]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8
-// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8
-// CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8
-// CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8
-// CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8
-// CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE19:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGET_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE29:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGET_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE39:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGET_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE49:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGET_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE59:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGET_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE69:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGET_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[VFMLSL_HIGH_I]], <8 x half> [[VFMLSL_HIGH1_I]], <8 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]]
//
float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
@@ -830,42 +662,32 @@ float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t
// CHECK-LABEL: @test_vfmlsl_laneq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[VFMLSL_LOW_I]], <4 x half> [[VFMLSL_LOW1_I]], <4 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]]
//
float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
@@ -874,42 +696,32 @@ float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t
// CHECK-LABEL: @test_vfmlsl_laneq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <4 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <4 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT32]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <8 x i8> [[TMP13]] to <4 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[VFMLSL_HIGH_I]], <4 x half> [[VFMLSL_HIGH1_I]], <4 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]]
//
float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
@@ -918,74 +730,48 @@ float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t
// CHECK-LABEL: @test_vfmlslq_laneq_low_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
-// CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE39:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGETQ_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE49:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGETQ_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE59:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGETQ_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE69:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGETQ_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_LOW_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_LOW1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[VFMLSL_LOW_I]], <8 x half> [[VFMLSL_LOW1_I]], <8 x half> [[VFMLSL_LOW2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]]
//
float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
@@ -994,74 +780,48 @@ float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t
// CHECK-LABEL: @test_vfmlslq_laneq_high_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2
-// CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[C:%.*]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2
-// CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16
-// CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2
-// CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16
-// CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2
-// CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16
-// CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2
-// CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16
-// CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2
-// CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16
-// CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2
-// CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6
-// CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16
-// CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2
-// CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7
-// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8>
-// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE9:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE9]] to half
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE19:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[VGETQ_LANE19]] to half
+// CHECK-NEXT: [[VECINIT22:%.*]] = insertelement <8 x half> [[VECINIT12]], half [[TMP5]], i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE29:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[VGETQ_LANE29]] to half
+// CHECK-NEXT: [[VECINIT32:%.*]] = insertelement <8 x half> [[VECINIT22]], half [[TMP7]], i32 3
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE39:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[VGETQ_LANE39]] to half
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x half> [[VECINIT32]], half [[TMP9]], i32 4
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE49:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[VGETQ_LANE49]] to half
+// CHECK-NEXT: [[VECINIT52:%.*]] = insertelement <8 x half> [[VECINIT42]], half [[TMP11]], i32 5
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE59:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[VGETQ_LANE59]] to half
+// CHECK-NEXT: [[VECINIT62:%.*]] = insertelement <8 x half> [[VECINIT52]], half [[TMP13]], i32 6
+// CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[VGETQ_LANE69:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[VGETQ_LANE69]] to half
+// CHECK-NEXT: [[VECINIT72:%.*]] = insertelement <8 x half> [[VECINIT62]], half [[TMP15]], i32 7
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT72]] to <8 x i16>
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP16]] to <16 x i8>
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP17]] to <16 x i8>
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP18]] to <16 x i8>
+// CHECK-NEXT: [[VFMLSL_HIGH_I:%.*]] = bitcast <16 x i8> [[TMP19]] to <4 x float>
+// CHECK-NEXT: [[VFMLSL_HIGH1_I:%.*]] = bitcast <16 x i8> [[TMP20]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH2_I:%.*]] = bitcast <16 x i8> [[TMP21]] to <8 x half>
+// CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[VFMLSL_HIGH_I]], <8 x half> [[VFMLSL_HIGH1_I]], <8 x half> [[VFMLSL_HIGH2_I]])
// CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]]
//
float32x4_t test_vfmlslq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c
index 15ae7eea820e8..ba32cfb7f3bae 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics-constrained.c
@@ -1,12 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone \
-// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED %s
+// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefixes=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone \
// RUN: -ffp-exception-behavior=strict \
-// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,CONSTRAINED %s
+// RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefixes=CONSTRAINED %s
// REQUIRES: aarch64-registered-target
@@ -14,804 +15,1759 @@
#include <arm_neon.h>
-// COMMON-LABEL: test_vadd_f32
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
-// CONSTRAINED: [[ADD_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vadd_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0:[0-9]+]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vadd_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0:[0-9]+]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3:[0-9]+]]
+// CONSTRAINED-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
return vadd_f32(v1, v2);
}
-// COMMON-LABEL: test_vaddq_f32
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
-// CONSTRAINED: [[ADD_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vaddq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vaddq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
return vaddq_f32(v1, v2);
}
-// COMMON-LABEL: test_vsub_f32
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
-// CONSTRAINED: [[SUB_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vsub_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vsub_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
return vsub_f32(v1, v2);
}
-// COMMON-LABEL: test_vsubq_f32
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
-// CONSTRAINED: [[SUB_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vsubq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vsubq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
return vsubq_f32(v1, v2);
}
-// COMMON-LABEL: test_vsubq_f64
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
-// CONSTRAINED: [[SUB_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vsubq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x double> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vsubq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[SUB_I]]
+//
float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
return vsubq_f64(v1, v2);
}
-// COMMON-LABEL: test_vmul_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[MUL_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vmul_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[MUL_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vmul_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[MUL_I]]
+//
float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
return vmul_f32(v1, v2);
}
-// COMMON-LABEL: test_vmulq_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
-// CONSTRAINED: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[MUL_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vmulq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[MUL_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vmulq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[MUL_I]]
+//
float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
return vmulq_f32(v1, v2);
}
-// COMMON-LABEL: test_vmulq_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[MUL_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vmulq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x double> [[MUL_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vmulq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[MUL_I]]
+//
float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
return vmulq_f64(v1, v2);
}
-// COMMON-LABEL: test_vmla_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %v2, <2 x float> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
-// CONSTRAINED: [[ADD_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %v1, <2 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vmla_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vmla_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> [[V2]], <2 x float> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[V1]], <2 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vmla_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vmlaq_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %v2, <4 x float> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
-// CONSTRAINED: [[ADD_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %v1, <4 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vmlaq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vmlaq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> [[V2]], <4 x float> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> [[V1]], <4 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vmlaq_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vmlaq_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %v2, <2 x double> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
-// CONSTRAINED: [[ADD_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %v1, <2 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vmlaq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <2 x double> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vmlaq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[V2]], <2 x double> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> [[V1]], <2 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[ADD_I]]
+//
float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vmlaq_f64(v1, v2, v3);
}
-// COMMON-LABEL: test_vmls_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %v2, <2 x float> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
-// CONSTRAINED: [[SUB_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %v1, <2 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vmls_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vmls_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> [[V2]], <2 x float> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> [[V1]], <2 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vmls_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vmlsq_f32
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %v2, <4 x float> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
-// CONSTRAINED: [[SUB_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %v1, <4 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vmlsq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vmlsq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> [[V2]], <4 x float> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> [[V1]], <4 x float> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vmlsq_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vmlsq_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
-// CONSTRAINED: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %v2, <2 x double> %v3, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
-// CONSTRAINED: [[SUB_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %v1, <2 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vmlsq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V2]], [[V3]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[V1]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <2 x double> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vmlsq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[V2]], <2 x double> [[V3]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> [[V1]], <2 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[SUB_I]]
+//
float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vmlsq_f64(v1, v2, v3);
}
-// COMMON-LABEL: test_vfma_f32
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vfma_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <2 x float> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vfma_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vfma_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vfmaq_f32
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vfmaq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x float> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vfmaq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vfmaq_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vfmaq_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vfmaq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vfmaq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP9]]
+//
float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vfmaq_f64(v1, v2, v3);
}
-// COMMON-LABEL: test_vfms_f32
-// COMMONIR: [[SUB_I:%.*]] = fneg <2 x float> %v2
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vfms_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[V2]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <2 x float> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vfms_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[V2]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vfms_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vfmsq_f32
-// COMMONIR: [[SUB_I:%.*]] = fneg <4 x float> %v2
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vfmsq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[V2]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x float> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vfmsq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[V2]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vfmsq_f32(v1, v2, v3);
}
-// COMMON-LABEL: test_vfmsq_f64
-// COMMONIR: [[SUB_I:%.*]] = fneg <2 x double> %v2
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
-// CONSTRAINED: [[TMP3:%.*]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vfmsq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[V2]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vfmsq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[V2]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP9]]
+//
float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vfmsq_f64(v1, v2, v3);
}
-// COMMON-LABEL: test_vdivq_f64
-// UNCONSTRAINED: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
-// CONSTRAINED: [[DIV_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x double> [[DIV_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vdivq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[DIV_I:%.*]] = fdiv <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x double> [[DIV_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vdivq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[DIV_I:%.*]] = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x double> [[DIV_I]]
+//
float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
return vdivq_f64(v1, v2);
}
-// COMMON-LABEL: test_vdivq_f32
-// UNCONSTRAINED: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
-// CONSTRAINED: [[DIV_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <4 x float> [[DIV_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vdivq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[DIV_I:%.*]] = fdiv <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <4 x float> [[DIV_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vdivq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[DIV_I:%.*]] = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <4 x float> [[DIV_I]]
+//
float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
return vdivq_f32(v1, v2);
}
-// COMMON-LABEL: test_vdiv_f32
-// UNCONSTRAINED: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
-// CONSTRAINED: [[DIV_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fdiv.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <2 x float> [[DIV_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x float> @test_vdiv_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[DIV_I:%.*]] = fdiv <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: ret <2 x float> [[DIV_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x float> @test_vdiv_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[DIV_I:%.*]] = call <2 x float> @llvm.experimental.constrained.fdiv.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <2 x float> [[DIV_I]]
+//
float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
return vdiv_f32(v1, v2);
}
-// COMMON-LABEL: test_vceq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// COMMONIR: ret <2 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i32> @test_vceq_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i32> @test_vceq_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
return vceq_f32(v1, v2);
}
-// COMMON-LABEL: test_vceq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
-// CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double> %a, <1 x double> %b, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// COMMONIR: ret <1 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vceq_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oeq <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vceq_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmp.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
return vceq_f64(a, b);
}
-// COMMON-LABEL: test_vceqq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// COMMONIR: ret <4 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x i32> @test_vceqq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x i32> @test_vceqq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
return vceqq_f32(v1, v2);
}
-// COMMON-LABEL: test_vceqq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// COMMONIR: ret <2 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i64> @test_vceqq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i64> @test_vceqq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
return vceqq_f64(v1, v2);
}
-// COMMON-LABEL: test_vcge_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// COMMONIR: ret <2 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcge_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcge_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
return vcge_f32(v1, v2);
}
-// COMMON-LABEL: test_vcge_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
-// CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// COMMONIR: ret <1 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcge_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oge <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcge_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
return vcge_f64(a, b);
}
-// COMMON-LABEL: test_vcgeq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// COMMONIR: ret <4 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcgeq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcgeq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
return vcgeq_f32(v1, v2);
}
-// COMMON-LABEL: test_vcgeq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// COMMONIR: ret <2 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcgeq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcgeq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
return vcgeq_f64(v1, v2);
}
-// COMMON-LABEL: test_vcle_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// COMMONIR: ret <2 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcle_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcle_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
return vcle_f32(v1, v2);
}
-// COMMON-LABEL: test_vcle_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
-// CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// COMMONIR: ret <1 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcle_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ole <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcle_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
return vcle_f64(a, b);
}
-// COMMON-LABEL: test_vcleq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// COMMONIR: ret <4 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcleq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcleq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
return vcleq_f32(v1, v2);
}
-// COMMON-LABEL: test_vcleq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// COMMONIR: ret <2 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcleq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcleq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
return vcleq_f64(v1, v2);
}
-// COMMON-LABEL: test_vcgt_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// COMMONIR: ret <2 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcgt_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i32> @test_vcgt_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
return vcgt_f32(v1, v2);
}
-// COMMON-LABEL: test_vcgt_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
-// CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// COMMONIR: ret <1 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcgt_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ogt <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcgt_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
return vcgt_f64(a, b);
}
-// COMMON-LABEL: test_vcgtq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// COMMONIR: ret <4 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcgtq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcgtq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
return vcgtq_f32(v1, v2);
}
-// COMMON-LABEL: test_vcgtq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// COMMONIR: ret <2 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcgtq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcgtq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
return vcgtq_f64(v1, v2);
}
-// COMMON-LABEL: test_vclt_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// COMMONIR: ret <2 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i32> @test_vclt_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// UNCONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i32> @test_vclt_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> [[V1]], <2 x float> [[V2]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CONSTRAINED-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
return vclt_f32(v1, v2);
}
-// COMMON-LABEL: test_vclt_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
-// CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// COMMONIR: ret <1 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vclt_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp olt <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vclt_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CONSTRAINED-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
return vclt_f64(a, b);
}
-// COMMON-LABEL: test_vcltq_f32
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// COMMONIR: ret <4 x i32> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcltq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// UNCONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x i32> @test_vcltq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> [[V1]], <4 x float> [[V2]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CONSTRAINED-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
return vcltq_f32(v1, v2);
}
-// COMMON-LABEL: test_vcltq_f64
-// UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
-// CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// COMMONIR: ret <2 x i64> [[SEXT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcltq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[V1]], [[V2]]
+// UNCONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// UNCONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x i64> @test_vcltq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> [[V1]], <2 x double> [[V2]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CONSTRAINED-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
return vcltq_f64(v1, v2);
}
-// COMMON-LABEL: test_vpadds_f32
-// COMMONIR: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
-// COMMONIR: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
-// UNCONSTRAINED: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
-// CONSTRAINED: [[VPADDD_I:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LANE0_I]], float [[LANE1_I]], metadata !"round.tonearest", metadata !"fpexcept.strict"
-// COMMONIR: ret float [[VPADDD_I]]
+// UNCONSTRAINED-LABEL: define dso_local float @test_vpadds_f32(
+// UNCONSTRAINED-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[LANE0_I:%.*]] = extractelement <2 x float> [[A]], i64 0
+// UNCONSTRAINED-NEXT: [[LANE1_I:%.*]] = extractelement <2 x float> [[A]], i64 1
+// UNCONSTRAINED-NEXT: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
+// UNCONSTRAINED-NEXT: ret float [[VPADDD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local float @test_vpadds_f32(
+// CONSTRAINED-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[LANE0_I:%.*]] = extractelement <2 x float> [[A]], i64 0
+// CONSTRAINED-NEXT: [[LANE1_I:%.*]] = extractelement <2 x float> [[A]], i64 1
+// CONSTRAINED-NEXT: [[VPADDD_I:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LANE0_I]], float [[LANE1_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret float [[VPADDD_I]]
+//
float32_t test_vpadds_f32(float32x2_t a) {
return vpadds_f32(a);
}
-// COMMON-LABEL: test_vpaddd_f64
-// COMMONIR: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
-// COMMONIR: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
-// UNCONSTRAINED: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
-// CONSTRAINED: [[VPADDD_I:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LANE0_I]], double [[LANE1_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret double [[VPADDD_I]]
+// UNCONSTRAINED-LABEL: define dso_local double @test_vpaddd_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[LANE0_I:%.*]] = extractelement <2 x double> [[A]], i64 0
+// UNCONSTRAINED-NEXT: [[LANE1_I:%.*]] = extractelement <2 x double> [[A]], i64 1
+// UNCONSTRAINED-NEXT: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
+// UNCONSTRAINED-NEXT: ret double [[VPADDD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local double @test_vpaddd_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[LANE0_I:%.*]] = extractelement <2 x double> [[A]], i64 0
+// CONSTRAINED-NEXT: [[LANE1_I:%.*]] = extractelement <2 x double> [[A]], i64 1
+// CONSTRAINED-NEXT: [[VPADDD_I:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[LANE0_I]], double [[LANE1_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret double [[VPADDD_I]]
+//
float64_t test_vpaddd_f64(float64x2_t a) {
return vpaddd_f64(a);
}
-// COMMON-LABEL: test_vcvts_f32_s32
-// UNCONSTRAINED: [[TMP0:%.*]] = sitofp i32 %a to float
-// CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret float [[TMP0]]
+// UNCONSTRAINED-LABEL: define dso_local float @test_vcvts_f32_s32(
+// UNCONSTRAINED-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = sitofp i32 [[A]] to float
+// UNCONSTRAINED-NEXT: ret float [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local float @test_vcvts_f32_s32(
+// CONSTRAINED-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret float [[TMP0]]
+//
float32_t test_vcvts_f32_s32(int32_t a) {
return vcvts_f32_s32(a);
}
-// COMMON-LABEL: test_vcvtd_f64_s64
-// UNCONSTRAINED: [[TMP0:%.*]] = sitofp i64 %a to double
-// CONSTRAINED: [[TMP0:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret double [[TMP0]]
+// UNCONSTRAINED-LABEL: define dso_local double @test_vcvtd_f64_s64(
+// UNCONSTRAINED-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = sitofp i64 [[A]] to double
+// UNCONSTRAINED-NEXT: ret double [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local double @test_vcvtd_f64_s64(
+// CONSTRAINED-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret double [[TMP0]]
+//
float64_t test_vcvtd_f64_s64(int64_t a) {
return vcvtd_f64_s64(a);
}
-// COMMON-LABEL: test_vcvts_f32_u32
-// UNCONSTRAINED: [[TMP0:%.*]] = uitofp i32 %a to float
-// CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret float [[TMP0]]
+// UNCONSTRAINED-LABEL: define dso_local float @test_vcvts_f32_u32(
+// UNCONSTRAINED-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = uitofp i32 [[A]] to float
+// UNCONSTRAINED-NEXT: ret float [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local float @test_vcvts_f32_u32(
+// CONSTRAINED-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret float [[TMP0]]
+//
float32_t test_vcvts_f32_u32(uint32_t a) {
return vcvts_f32_u32(a);
}
// XXX should verify the type of registers
-// COMMON-LABEL: test_vcvtd_f64_u64
-// UNCONSTRAINED: [[TMP0:%.*]] = uitofp i64 %a to double
-// CONSTRAINED: [[TMP0:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret double [[TMP0]]
+// UNCONSTRAINED-LABEL: define dso_local double @test_vcvtd_f64_u64(
+// UNCONSTRAINED-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = uitofp i64 [[A]] to double
+// UNCONSTRAINED-NEXT: ret double [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local double @test_vcvtd_f64_u64(
+// CONSTRAINED-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret double [[TMP0]]
+//
float64_t test_vcvtd_f64_u64(uint64_t a) {
return vcvtd_f64_u64(a);
}
-// COMMON-LABEL: test_vceqs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oeq float %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vceqs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oeq float [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vceqs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float [[A]], float [[B]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vceqs_f32(float32_t a, float32_t b) {
return (uint32_t)vceqs_f32(a, b);
}
-// COMMON-LABEL: test_vceqd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oeq double %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vceqd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oeq double [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vceqd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vceqd_f64(float64_t a, float64_t b) {
return (uint64_t)vceqd_f64(a, b);
}
-// COMMON-LABEL: test_vceqzs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCEQZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vceqzs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oeq float [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCEQZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vceqzs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float [[A]], float 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCEQZ_I]]
+//
uint32_t test_vceqzs_f32(float32_t a) {
return (uint32_t)vceqzs_f32(a);
}
-// COMMON-LABEL: test_vceqzd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict")
-// COMMONIR: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCEQZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vceqzd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oeq double [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCEQZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vceqzd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A]], double 0.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCEQZ_I]]
+//
uint64_t test_vceqzd_f64(float64_t a) {
return (uint64_t)vceqzd_f64(a);
}
-// COMMON-LABEL: test_vcges_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge float %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcges_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oge float [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcges_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float [[B]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcges_f32(float32_t a, float32_t b) {
return (uint32_t)vcges_f32(a, b);
}
-// COMMON-LABEL: test_vcged_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge double %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcged_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oge double [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcged_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcged_f64(float64_t a, float64_t b) {
return (uint64_t)vcged_f64(a, b);
}
-// COMMON-LABEL: test_vcgezs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCGEZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcgezs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oge float [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCGEZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcgezs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCGEZ_I]]
+//
uint32_t test_vcgezs_f32(float32_t a) {
return (uint32_t)vcgezs_f32(a);
}
-// COMMON-LABEL: test_vcgezd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict")
-// COMMONIR: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCGEZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcgezd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp oge double [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCGEZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcgezd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCGEZ_I]]
+//
uint64_t test_vcgezd_f64(float64_t a) {
return (uint64_t)vcgezd_f64(a);
}
-// COMMON-LABEL: test_vcgts_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt float %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcgts_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ogt float [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcgts_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float [[B]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcgts_f32(float32_t a, float32_t b) {
return (uint32_t)vcgts_f32(a, b);
}
-// COMMON-LABEL: test_vcgtd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt double %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcgtd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ogt double [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcgtd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
return (uint64_t)vcgtd_f64(a, b);
}
-// COMMON-LABEL: test_vcgtzs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCGTZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcgtzs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ogt float [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCGTZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcgtzs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCGTZ_I]]
+//
uint32_t test_vcgtzs_f32(float32_t a) {
return (uint32_t)vcgtzs_f32(a);
}
-// COMMON-LABEL: test_vcgtzd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCGTZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcgtzd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ogt double [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCGTZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcgtzd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCGTZ_I]]
+//
uint64_t test_vcgtzd_f64(float64_t a) {
return (uint64_t)vcgtzd_f64(a);
}
-// COMMON-LABEL: test_vcles_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole float %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcles_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ole float [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcles_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float [[B]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcles_f32(float32_t a, float32_t b) {
return (uint32_t)vcles_f32(a, b);
}
-// COMMON-LABEL: test_vcled_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole double %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcled_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ole double [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcled_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcled_f64(float64_t a, float64_t b) {
return (uint64_t)vcled_f64(a, b);
}
-// COMMON-LABEL: test_vclezs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCLEZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vclezs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ole float [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCLEZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vclezs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCLEZ_I]]
+//
uint32_t test_vclezs_f32(float32_t a) {
return (uint32_t)vclezs_f32(a);
}
-// COMMON-LABEL: test_vclezd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict")
-// COMMONIR: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCLEZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vclezd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp ole double [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCLEZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vclezd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCLEZ_I]]
+//
uint64_t test_vclezd_f64(float64_t a) {
return (uint64_t)vclezd_f64(a);
}
-// COMMON-LABEL: test_vclts_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt float %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vclts_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp olt float [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vclts_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float [[B]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vclts_f32(float32_t a, float32_t b) {
return (uint32_t)vclts_f32(a, b);
}
-// COMMON-LABEL: test_vcltd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt double %a, %b
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCMPD_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcltd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp olt double [[A]], [[B]]
+// UNCONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcltd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcltd_f64(float64_t a, float64_t b) {
return (uint64_t)vcltd_f64(a, b);
}
-// COMMON-LABEL: test_vcltzs_f32
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// COMMONIR: ret i32 [[VCLTZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i32 @test_vcltzs_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp olt float [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// UNCONSTRAINED-NEXT: ret i32 [[VCLTZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i32 @test_vcltzs_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float [[A]], float 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CONSTRAINED-NEXT: ret i32 [[VCLTZ_I]]
+//
uint32_t test_vcltzs_f32(float32_t a) {
return (uint32_t)vcltzs_f32(a);
}
-// COMMON-LABEL: test_vcltzd_f64
-// UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
-// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict")
-// COMMONIR: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// COMMONIR: ret i64 [[VCLTZ_I]]
+// UNCONSTRAINED-LABEL: define dso_local i64 @test_vcltzd_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fcmp olt double [[A]], 0.000000e+00
+// UNCONSTRAINED-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// UNCONSTRAINED-NEXT: ret i64 [[VCLTZ_I]]
+//
+// CONSTRAINED-LABEL: define dso_local i64 @test_vcltzd_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CONSTRAINED-NEXT: ret i64 [[VCLTZ_I]]
+//
uint64_t test_vcltzd_f64(float64_t a) {
return (uint64_t)vcltzd_f64(a);
}
-// COMMON-LABEL: test_vadd_f64
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
-// CONSTRAINED: [[ADD_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> %a, <1 x double> %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vadd_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vadd_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[ADD_I]]
+//
float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
return vadd_f64(a, b);
}
-// COMMON-LABEL: test_vmul_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
-// CONSTRAINED: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %a, <1 x double> %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[MUL_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vmul_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[MUL_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vmul_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[MUL_I]]
+//
float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
return vmul_f64(a, b);
}
-// COMMON-LABEL: test_vdiv_f64
-// UNCONSTRAINED: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
-// CONSTRAINED: [[DIV_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double> %a, <1 x double> %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[DIV_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vdiv_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[DIV_I:%.*]] = fdiv <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[DIV_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vdiv_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[DIV_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fdiv.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[DIV_I]]
+//
float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
return vdiv_f64(a, b);
}
-// COMMON-LABEL: test_vmla_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
-// CONSTRAINED: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %b, <1 x double> %c, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
-// CONSTRAINED: [[ADD_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> %a, <1 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[ADD_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vmla_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[B]], [[C]]
+// UNCONSTRAINED-NEXT: [[ADD_I:%.*]] = fadd <1 x double> [[A]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[ADD_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vmla_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> [[B]], <1 x double> [[C]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[ADD_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fadd.v1f64(<1 x double> [[A]], <1 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[ADD_I]]
+//
float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmla_f64(a, b, c);
}
-// COMMON-LABEL: test_vmls_f64
-// UNCONSTRAINED: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
-// CONSTRAINED: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> %b, <1 x double> %c, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
-// CONSTRAINED: [[SUB_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> %a, <1 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vmls_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[B]], [[C]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <1 x double> [[A]], [[MUL_I]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vmls_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[MUL_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fmul.v1f64(<1 x double> [[B]], <1 x double> [[C]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> [[A]], <1 x double> [[MUL_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[SUB_I]]
+//
float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmls_f64(a, b, c);
}
-// COMMON-LABEL: test_vfma_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
-// CONSTRAINED: [[TMP3:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// UNCONSTRAINED-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// UNCONSTRAINED-NEXT: [[__P2_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CONSTRAINED-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// CONSTRAINED-NEXT: [[__P2_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[TMP9]]
+//
float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfma_f64(a, b, c);
}
-// COMMON-LABEL: test_vfms_f64
-// COMMONIR: [[SUB_I:%.*]] = fneg <1 x double> %b
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
-// UNCONSTRAINED: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
-// CONSTRAINED: [[TMP3:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[TMP3]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to i64
+// UNCONSTRAINED-NEXT: [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// UNCONSTRAINED-NEXT: [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[B]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to i64
+// CONSTRAINED-NEXT: [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// CONSTRAINED-NEXT: [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[TMP9]]
+//
float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfms_f64(a, b, c);
}
-// COMMON-LABEL: test_vsub_f64
-// UNCONSTRAINED: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
-// CONSTRAINED: [[SUB_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> %a, <1 x double> %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[SUB_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vsub_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[SUB_I:%.*]] = fsub <1 x double> [[A]], [[B]]
+// UNCONSTRAINED-NEXT: ret <1 x double> [[SUB_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vsub_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[SUB_I:%.*]] = call <1 x double> @llvm.experimental.constrained.fsub.v1f64(<1 x double> [[A]], <1 x double> [[B]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[SUB_I]]
+//
float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
return vsub_f64(a, b);
}
-// COMMON-LABEL: test_vcvt_s64_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
-// COMMONIR: ret <1 x i64> [[TMP1]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> [[VCVTZ_I]])
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
int64x1_t test_vcvt_s64_f64(float64x1_t a) {
return vcvt_s64_f64(a);
}
-// COMMON-LABEL: test_vcvt_u64_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
-// COMMONIR: ret <1 x i64> [[TMP1]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> [[VCVTZ_I]])
+// UNCONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> [[VCVTZ_I]]) #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
return vcvt_u64_f64(a);
}
-// COMMON-LABEL: test_vcvt_f64_s64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// UNCONSTRAINED: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
-// CONSTRAINED: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VCVT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vcvt_f64_s64(
+// UNCONSTRAINED-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// UNCONSTRAINED-NEXT: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VCVT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vcvt_f64_s64(
+// CONSTRAINED-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CONSTRAINED-NEXT: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VCVT_I]]
+//
float64x1_t test_vcvt_f64_s64(int64x1_t a) {
return vcvt_f64_s64(a);
}
-// COMMON-LABEL: test_vcvt_f64_u64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// UNCONSTRAINED: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
-// CONSTRAINED: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VCVT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vcvt_f64_u64(
+// UNCONSTRAINED-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// UNCONSTRAINED-NEXT: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VCVT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vcvt_f64_u64(
+// CONSTRAINED-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CONSTRAINED-NEXT: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VCVT_I]]
+//
float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
return vcvt_f64_u64(a);
}
-// COMMON-LABEL: test_vrnda_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %a, metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDA1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrnda_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDA1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrnda_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> [[VRNDA_I]], metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDA1_I]]
+//
float64x1_t test_vrnda_f64(float64x1_t a) {
return vrnda_f64(a);
}
-// COMMON-LABEL: test_vrndp_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %a, metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDP1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndp_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDP1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndp_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> [[VRNDP_I]], metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDP1_I]]
+//
float64x1_t test_vrndp_f64(float64x1_t a) {
return vrndp_f64(a);
}
-// COMMON-LABEL: test_vrndm_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %a, metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDM1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndm_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDM1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndm_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> [[VRNDM_I]], metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDM1_I]]
+//
float64x1_t test_vrndm_f64(float64x1_t a) {
return vrndm_f64(a);
}
-// COMMON-LABEL: test_vrndx_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDX1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndx_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDX1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndx_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.rint.v1f64(<1 x double> [[VRNDX_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDX1_I]]
+//
float64x1_t test_vrndx_f64(float64x1_t a) {
return vrndx_f64(a);
}
-// COMMON-LABEL: test_vrnd_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %a, metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDZ1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrnd_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDZ1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrnd_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> [[VRNDZ_I]], metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDZ1_I]]
+//
float64x1_t test_vrnd_f64(float64x1_t a) {
return vrnd_f64(a);
}
-// COMMON-LABEL: test_vrndi_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VRNDI1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndi_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDI_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VRNDI_V1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_V_I]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VRNDI_V1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vrndi_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[VRNDI_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VRNDI_V1_I:%.*]] = call <1 x double> @llvm.experimental.constrained.nearbyint.v1f64(<1 x double> [[VRNDI_V_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VRNDI_V1_I]]
+//
float64x1_t test_vrndi_f64(float64x1_t a) {
return vrndi_f64(a);
}
-// COMMON-LABEL: test_vsqrt_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// UNCONSTRAINED: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
-// CONSTRAINED: [[VSQRT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// COMMONIR: ret <1 x double> [[VSQRT_I]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vsqrt_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP2]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[VSQRT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vsqrt_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.sqrt.v1f64(<1 x double> [[TMP2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR3]]
+// CONSTRAINED-NEXT: ret <1 x double> [[VSQRT_I]]
+//
float64x1_t test_vsqrt_f64(float64x1_t a) {
return vsqrt_f64(a);
}
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 271ae056308d2..791f0a1a29409 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -1,17418 +1,23567 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone \
// RUN: -flax-vector-conversions=none -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vadd_s8(
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
return vadd_s8(v1, v2);
}
-// CHECK-LABEL: @test_vadd_s16(
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
return vadd_s16(v1, v2);
}
-// CHECK-LABEL: @test_vadd_s32(
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
return vadd_s32(v1, v2);
}
-// CHECK-LABEL: @test_vadd_s64(
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vadd_s64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
return vadd_s64(v1, v2);
}
-// CHECK-LABEL: @test_vadd_f32(
-// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
-// CHECK: ret <2 x float> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vadd_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
return vadd_f32(v1, v2);
}
-// CHECK-LABEL: @test_vadd_u8(
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
return vadd_u8(v1, v2);
}
-// CHECK-LABEL: @test_vadd_u16(
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
return vadd_u16(v1, v2);
}
-// CHECK-LABEL: @test_vadd_u32(
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
return vadd_u32(v1, v2);
}
-// CHECK-LABEL: @test_vadd_u64(
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vadd_u64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
return vadd_u64(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_s8(
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
return vaddq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_s16(
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
return vaddq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_s32(
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
return vaddq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_s64(
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
return vaddq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_f32(
-// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
-// CHECK: ret <4 x float> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vaddq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
return vaddq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_f64(
-// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
-// CHECK: ret <2 x double> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vaddq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x double> [[ADD_I]]
+//
float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
return vaddq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_u8(
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
return vaddq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_u16(
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
return vaddq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_u32(
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
return vaddq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vaddq_u64(
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
return vaddq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vsub_s8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
return vsub_s8(v1, v2);
}
-// CHECK-LABEL: @test_vsub_s16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
return vsub_s16(v1, v2);
}
-// CHECK-LABEL: @test_vsub_s32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
return vsub_s32(v1, v2);
}
-// CHECK-LABEL: @test_vsub_s64(
-// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
-// CHECK: ret <1 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsub_s64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <1 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <1 x i64> [[SUB_I]]
+//
int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
return vsub_s64(v1, v2);
}
-// CHECK-LABEL: @test_vsub_f32(
-// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vsub_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
return vsub_f32(v1, v2);
}
-// CHECK-LABEL: @test_vsub_u8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
return vsub_u8(v1, v2);
}
-// CHECK-LABEL: @test_vsub_u16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
return vsub_u16(v1, v2);
}
-// CHECK-LABEL: @test_vsub_u32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
return vsub_u32(v1, v2);
}
-// CHECK-LABEL: @test_vsub_u64(
-// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
-// CHECK: ret <1 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsub_u64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <1 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <1 x i64> [[SUB_I]]
+//
uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
return vsub_u64(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_s8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
return vsubq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_s16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
return vsubq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_s32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
return vsubq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_s64(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
return vsubq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_f32(
-// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vsubq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
return vsubq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_f64(
-// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
-// CHECK: ret <2 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vsubq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x double> [[SUB_I]]
+//
float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
return vsubq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_u8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
return vsubq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_u16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
return vsubq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_u32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
return vsubq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vsubq_u64(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
return vsubq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vmul_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[MUL_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmul_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[MUL_I]]
+//
int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
return vmul_s8(v1, v2);
}
-// CHECK-LABEL: @test_vmul_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmul_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
return vmul_s16(v1, v2);
}
-// CHECK-LABEL: @test_vmul_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmul_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
return vmul_s32(v1, v2);
}
-// CHECK-LABEL: @test_vmul_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
-// CHECK: ret <2 x float> [[MUL_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmul_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x float> [[MUL_I]]
+//
float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
return vmul_f32(v1, v2);
}
-// CHECK-LABEL: @test_vmul_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
-// CHECK: ret <8 x i8> [[MUL_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmul_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i8> [[MUL_I]]
+//
uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
return vmul_u8(v1, v2);
}
-// CHECK-LABEL: @test_vmul_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmul_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
return vmul_u16(v1, v2);
}
-// CHECK-LABEL: @test_vmul_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmul_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
return vmul_u32(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[MUL_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmulq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[MUL_I]]
+//
int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
return vmulq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmulq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
return vmulq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmulq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
return vmulq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
-// CHECK: ret <16 x i8> [[MUL_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmulq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: ret <16 x i8> [[MUL_I]]
+//
uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
return vmulq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmulq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
return vmulq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmulq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
return vmulq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
-// CHECK: ret <4 x float> [[MUL_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmulq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x float> [[MUL_I]]
+//
float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
return vmulq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
-// CHECK: ret <2 x double> [[MUL_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmulq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x double> [[MUL_I]]
+//
float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
return vmulq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vmul_p8(
-// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VMUL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmul_p8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VMUL_V_I]]
+//
poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
return vmul_p8(v1, v2);
}
-// CHECK-LABEL: @test_vmulq_p8(
-// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VMULQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmulq_p8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VMULQ_V_I]]
+//
poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
return vmulq_p8(v1, v2);
}
-// CHECK-LABEL: @test_vmla_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmla_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
return vmla_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmla_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
return (int8x8_t)vmla_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmla_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
return vmla_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
-// CHECK: ret <2 x float> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmla_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vmla_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmla_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
return vmla_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmla_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
return vmla_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmla_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmla_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
return vmla_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmlaq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
return vmlaq_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlaq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
return vmlaq_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
return vmlaq_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
-// CHECK: ret <4 x float> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmlaq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vmlaq_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmlaq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
return vmlaq_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlaq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
return vmlaq_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlaq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
return vmlaq_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlaq_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
-// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
-// CHECK: ret <2 x double> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmlaq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V2]], [[V3]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x double> [[ADD_I]]
+//
float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vmlaq_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmls_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
return vmls_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmls_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
return (int8x8_t)vmls_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmls_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
return vmls_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmls_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vmls_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmls_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
return vmls_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmls_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
return vmls_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmls_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmls_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
return vmls_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmlsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
return vmlsq_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
return vmlsq_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
return vmlsq_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmlsq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vmlsq_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmlsq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
return vmlsq_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
return vmlsq_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
return vmlsq_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vmlsq_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
-// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
-// CHECK: ret <2 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmlsq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[V2]], [[V3]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[V1]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x double> [[SUB_I]]
+//
float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vmlsq_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfma_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
-// CHECK: ret <2 x float> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vfma_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vfma_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfmaq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
-// CHECK: ret <4 x float> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vfmaq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vfmaq_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfmaq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
-// CHECK: ret <2 x double> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vfmaq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
+//
float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vfmaq_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfms_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x float> %v2
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
-// CHECK: ret <2 x float> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vfms_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vfms_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfmsq_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <4 x float> %v2
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
-// CHECK: ret <4 x float> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vfmsq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vfmsq_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vfmsq_f64(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x double> %v2
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
-// CHECK: ret <2 x double> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vfmsq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x double> [[TMP6]])
+// CHECK-NEXT: ret <2 x double> [[TMP9]]
+//
float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vfmsq_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vdivq_f64(
-// CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
-// CHECK: ret <2 x double> [[DIV_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vdivq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x double> [[DIV_I]]
+//
float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
return vdivq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vdivq_f32(
-// CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
-// CHECK: ret <4 x float> [[DIV_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vdivq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <4 x float> [[DIV_I]]
+//
float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
return vdivq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vdiv_f32(
-// CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
-// CHECK: ret <2 x float> [[DIV_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vdiv_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: ret <2 x float> [[DIV_I]]
+//
float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
return vdiv_f32(v1, v2);
}
-// CHECK-LABEL: @test_vaba_s8(
-// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
return vaba_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vaba_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
return vaba_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vaba_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
return vaba_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vaba_u8(
-// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
return vaba_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vaba_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
return vaba_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vaba_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
return vaba_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_s8(
-// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
return vabaq_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabaq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
return vabaq_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
return vabaq_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_u8(
-// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
return vabaq_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabaq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
return vabaq_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabaq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabaq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[V1]], [[VABD2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
return vabaq_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vabd_s8(
-// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VABD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VABD_I]]
+//
int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
return vabd_s8(v1, v2);
}
-// CHECK-LABEL: @test_vabd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: ret <4 x i16> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VABD2_I]]
+//
int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
return vabd_s16(v1, v2);
}
-// CHECK-LABEL: @test_vabd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: ret <2 x i32> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VABD2_I]]
+//
int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
return vabd_s32(v1, v2);
}
-// CHECK-LABEL: @test_vabd_u8(
-// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VABD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VABD_I]]
+//
uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
return vabd_u8(v1, v2);
}
-// CHECK-LABEL: @test_vabd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: ret <4 x i16> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VABD2_I]]
+//
uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
return vabd_u16(v1, v2);
}
-// CHECK-LABEL: @test_vabd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: ret <2 x i32> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VABD2_I]]
+//
uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
return vabd_u32(v1, v2);
}
-// CHECK-LABEL: @test_vabd_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
-// CHECK: ret <2 x float> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vabd_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]])
+// CHECK-NEXT: ret <2 x float> [[VABD2_I]]
+//
float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
return vabd_f32(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_s8(
-// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VABD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VABD_I]]
+//
int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
return vabdq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: ret <8 x i16> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VABD2_I]]
+//
int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
return vabdq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: ret <4 x i32> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VABD2_I]]
+//
int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
return vabdq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_u8(
-// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VABD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VABD_I]]
+//
uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
return vabdq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: ret <8 x i16> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VABD2_I]]
+//
uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
return vabdq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: ret <4 x i32> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VABD2_I]]
+//
uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
return vabdq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
-// CHECK: ret <4 x float> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vabdq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]])
+// CHECK-NEXT: ret <4 x float> [[VABD2_I]]
+//
float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
return vabdq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vabdq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
-// CHECK: ret <2 x double> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vabdq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]])
+// CHECK-NEXT: ret <2 x double> [[VABD2_I]]
+//
float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
return vabdq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vbsl_s8(
-// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <8 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]]
+//
int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
return vbsl_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP4]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP4]]
+//
int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
return (int8x8_t)vbsl_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <2 x i32> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vbsl_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <2 x i32> [[VBSL5_I]]
+//
int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
return vbsl_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <1 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vbsl_s64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]], <1 x i64> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <1 x i64> [[VBSL5_I]]
+//
int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
return vbsl_s64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_u8(
-// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <8 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]]
+//
uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
return vbsl_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <4 x i16> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vbsl_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <4 x i16> [[VBSL5_I]]
+//
uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
return vbsl_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <2 x i32> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vbsl_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <2 x i32> [[VBSL5_I]]
+//
uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
return vbsl_u32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <1 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vbsl_u64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x i64> noundef [[V2:%.*]], <1 x i64> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <1 x i64> [[VBSL5_I]]
+//
uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
return vbsl_u64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
-// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
-// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
-// CHECK: ret <2 x float> [[TMP5]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vbsl_f32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP5]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP6]]
+//
float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
return vbsl_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
-// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
-// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
-// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
-// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
-// CHECK: ret <1 x double> [[TMP4]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vbsl_f64(
+// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]], <1 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V2]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V3]] to i64
+// CHECK-NEXT: [[__P2_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <1 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP5]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP6]]
+//
float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
return vbsl_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_p8(
-// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <8 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_p8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]]
+//
poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
return vbsl_p8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbsl_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <4 x i16> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vbsl_p16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <4 x i16> [[VBSL5_I]]
+//
poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
return vbsl_p16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_s8(
-// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <16 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]]
+//
int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
return vbslq_s8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <8 x i16> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vbslq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <8 x i16> [[VBSL5_I]]
+//
int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
return vbslq_s16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <4 x i32> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vbslq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <4 x i32> [[VBSL5_I]]
+//
int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
return vbslq_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <2 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vbslq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]], <2 x i64> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <2 x i64> [[VBSL5_I]]
+//
int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
return vbslq_s64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_u8(
-// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <16 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]]
+//
uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
return vbslq_u8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <8 x i16> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vbslq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <8 x i16> [[VBSL5_I]]
+//
uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
return vbslq_u16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <4 x i32> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vbslq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <4 x i32> [[VBSL5_I]]
+//
int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
return vbslq_s32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <2 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vbslq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]], <2 x i64> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <2 x i64> [[VBSL5_I]]
+//
uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
return vbslq_u64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
-// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
-// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
-// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
-// CHECK: ret <4 x float> [[TMP4]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vbslq_f32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[VBSL_I]], splat (i32 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP5]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
return vbslq_f32(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_p8(
-// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
-// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
-// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK: ret <16 x i8> [[VBSL2_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_p8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1)
+// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]]
+// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]]
+//
poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
return vbslq_p8(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
-// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
-// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <8 x i16> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vbslq_p16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <8 x i16> [[VBSL5_I]]
+//
poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
return vbslq_p16(v1, v2, v3);
}
-// CHECK-LABEL: @test_vbslq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
-// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
-// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
-// CHECK: ret <2 x double> [[TMP4]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vbslq_f64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]], <2 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V3]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP5]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP6]]
+//
float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
return vbslq_f64(v1, v2, v3);
}
-// CHECK-LABEL: @test_vrecps_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
-// CHECK: ret <2 x float> [[VRECPS_V2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrecps_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]])
+// CHECK-NEXT: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
return vrecps_f32(v1, v2);
}
-// CHECK-LABEL: @test_vrecpsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
-// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrecpsq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]])
+// CHECK-NEXT: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
return vrecpsq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vrecpsq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
-// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x double> [[VRECPSQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrecpsq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]])
+// CHECK-NEXT: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP5]]
+//
float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
return vrecpsq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vrsqrts_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
-// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrsqrts_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]])
+// CHECK-NEXT: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
return vrsqrts_f32(v1, v2);
}
-// CHECK-LABEL: @test_vrsqrtsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
-// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrsqrtsq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]])
+// CHECK-NEXT: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
return vrsqrtsq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vrsqrtsq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
-// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrsqrtsq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]])
+// CHECK-NEXT: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP5]]
+//
float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
return vrsqrtsq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcage_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
-// CHECK: ret <2 x i32> [[VCAGE_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcage_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCAGE_V2_I]]
+//
uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
return vcage_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcage_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x i64> [[VCAGE_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcage_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCAGE_V2_I]]
+//
uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
return vcage_f64(a, b);
}
-// CHECK-LABEL: @test_vcageq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
-// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcageq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCAGEQ_V2_I]]
+//
uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
return vcageq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcageq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
-// CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcageq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCAGEQ_V2_I]]
+//
uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
return vcageq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcagt_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
-// CHECK: ret <2 x i32> [[VCAGT_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcagt_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCAGT_V2_I]]
+//
uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
return vcagt_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcagt_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x i64> [[VCAGT_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcagt_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCAGT_V2_I]]
+//
uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
return vcagt_f64(a, b);
}
-// CHECK-LABEL: @test_vcagtq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
-// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcagtq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCAGTQ_V2_I]]
+//
uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
return vcagtq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcagtq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
-// CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcagtq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCAGTQ_V2_I]]
+//
uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
return vcagtq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcale_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
-// CHECK: ret <2 x i32> [[VCALE_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcale_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCALE_V2_I]]
+//
uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
return vcale_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vcale_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
-// CHECK: ret <1 x i64> [[VCALE_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcale_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCALE_V2_I]]
+//
uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
return vcale_f64(a, b);
}
-// CHECK-LABEL: @test_vcaleq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
-// CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcaleq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCALEQ_V2_I]]
+//
uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
return vcaleq_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vcaleq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
-// CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcaleq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCALEQ_V2_I]]
+//
uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
return vcaleq_f64(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vcalt_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
-// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
-// CHECK: ret <2 x i32> [[VCALT_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcalt_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCALT_V2_I]]
+//
uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
return vcalt_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vcalt_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
-// CHECK: ret <1 x i64> [[VCALT_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcalt_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCALT_V2_I]]
+//
uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
return vcalt_f64(a, b);
}
-// CHECK-LABEL: @test_vcaltq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
-// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
-// CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcaltq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCALTQ_V2_I]]
+//
uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
return vcaltq_f32(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vcaltq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
-// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
-// CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcaltq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCALTQ_V2_I]]
+//
uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
return vcaltq_f64(v1, v2);
// Using registers other than v0, v1 are possible, but would be odd.
}
-// CHECK-LABEL: @test_vtst_s8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtst_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
return vtst_s8(v1, v2);
}
-// CHECK-LABEL: @test_vtst_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtst_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
return vtst_s16(v1, v2);
}
-// CHECK-LABEL: @test_vtst_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VTST_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtst_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VTST_I]]
+//
uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
return vtst_s32(v1, v2);
}
-// CHECK-LABEL: @test_vtst_u8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtst_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
return vtst_u8(v1, v2);
}
-// CHECK-LABEL: @test_vtst_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtst_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
return vtst_u16(v1, v2);
}
-// CHECK-LABEL: @test_vtst_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VTST_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtst_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VTST_I]]
+//
uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
return vtst_u32(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_s8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtstq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
return vtstq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtstq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
return vtstq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VTST_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtstq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VTST_I]]
+//
uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
return vtstq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_u8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtstq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
return vtstq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtstq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
return vtstq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VTST_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtstq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VTST_I]]
+//
uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
return vtstq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtstq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VTST_I]]
+//
uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
return vtstq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtstq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VTST_I]]
+//
uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
return vtstq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vtst_p8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtst_p8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
return vtst_p8(v1, v2);
}
-// CHECK-LABEL: @test_vtst_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtst_p16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
return vtst_p16(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_p8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtstq_p8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
return vtstq_p8(v1, v2);
}
-// CHECK-LABEL: @test_vtstq_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtstq_p16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
return vtstq_p16(v1, v2);
}
-// CHECK-LABEL: @test_vtst_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vtst_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VTST_I]]
+//
uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
return vtst_s64(a, b);
}
-// CHECK-LABEL: @test_vtst_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vtst_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VTST_I]]
+//
uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
return vtst_u64(a, b);
}
-// CHECK-LABEL: @test_vceq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceq_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
return vceq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vceq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vceq_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
return vceq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vceq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceq_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
return vceq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vceq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceq_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
return vceq_s64(a, b);
}
-// CHECK-LABEL: @test_vceq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceq_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
return vceq_u64(a, b);
}
-// CHECK-LABEL: @test_vceq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceq_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
return vceq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vceq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceq_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <1 x double> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
return vceq_f64(a, b);
}
-// CHECK-LABEL: @test_vceq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceq_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
return vceq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vceq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vceq_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
return vceq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vceq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceq_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
return vceq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vceq_p8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceq_p8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
return vceq_p8(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
return vceqq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vceqq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
return vceqq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
return vceqq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
return vceqq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
return vceqq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vceqq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
return vceqq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
return vceqq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_p8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqq_p8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
return vceqq_p8(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
return vceqq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
return vceqq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vceqq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
return vceqq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcge_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcge_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
return vcge_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcge_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcge_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
return vcge_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcge_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcge_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
return vcge_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcge_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcge_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
return vcge_s64(a, b);
}
-// CHECK-LABEL: @test_vcge_u64(
-// CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcge_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
return vcge_u64(a, b);
}
-// CHECK-LABEL: @test_vcge_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcge_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
return vcge_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcge_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcge_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <1 x double> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
return vcge_f64(a, b);
}
-// CHECK-LABEL: @test_vcge_u8(
-// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcge_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
return vcge_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcge_u16(
-// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcge_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
return vcge_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcge_u32(
-// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcge_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
return vcge_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgeq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
return vcgeq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgeq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
return vcgeq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgeq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
return vcgeq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgeq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
return vcgeq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgeq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
return vcgeq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgeq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
return vcgeq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgeq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
return vcgeq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgeq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
return vcgeq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgeq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
return vcgeq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vcgeq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgeq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
return vcgeq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcle_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
// Notes about vcle:
// LE condition predicate implemented as GE, so check reversed operands.
// Using registers other than v0, v1 are possible, but would be odd.
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcle_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
return vcle_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcle_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcle_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
return vcle_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcle_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcle_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
return vcle_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcle_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcle_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
return vcle_s64(a, b);
}
-// CHECK-LABEL: @test_vcle_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcle_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
return vcle_u64(a, b);
}
-// CHECK-LABEL: @test_vcle_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcle_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
return vcle_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcle_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcle_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <1 x double> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
return vcle_f64(a, b);
}
-// CHECK-LABEL: @test_vcle_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcle_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
return vcle_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcle_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcle_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
return vcle_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcle_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcle_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
return vcle_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcleq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
return vcleq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcleq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
return vcleq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcleq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
return vcleq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcleq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
return vcleq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcleq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
return vcleq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcleq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
return vcleq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcleq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
return vcleq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcleq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
return vcleq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcleq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
return vcleq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vcleq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcleq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
return vcleq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcgt_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
return vcgt_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcgt_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
return vcgt_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgt_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
return vcgt_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgt_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
return vcgt_s64(a, b);
}
-// CHECK-LABEL: @test_vcgt_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgt_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
return vcgt_u64(a, b);
}
-// CHECK-LABEL: @test_vcgt_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgt_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
return vcgt_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgt_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <1 x double> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
return vcgt_f64(a, b);
}
-// CHECK-LABEL: @test_vcgt_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcgt_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
return vcgt_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcgt_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
return vcgt_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcgt_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgt_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
return vcgt_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgtq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
return vcgtq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgtq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
return vcgtq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgtq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
return vcgtq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgtq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
return vcgtq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgtq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
return vcgtq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgtq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
return vcgtq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgtq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
return vcgtq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgtq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
return vcgtq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgtq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
return vcgtq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vcgtq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgtq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
return vcgtq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vclt_s8(
-// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
// Notes about vclt:
// LT condition predicate implemented as GT, so check reversed operands.
// Using registers other than v0, v1 are possible, but would be odd.
+// CHECK-LABEL: define dso_local <8 x i8> @test_vclt_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
return vclt_s8(v1, v2);
}
-// CHECK-LABEL: @test_vclt_s16(
-// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vclt_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
return vclt_s16(v1, v2);
}
-// CHECK-LABEL: @test_vclt_s32(
-// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclt_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
return vclt_s32(v1, v2);
}
-// CHECK-LABEL: @test_vclt_s64(
-// CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vclt_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
return vclt_s64(a, b);
}
-// CHECK-LABEL: @test_vclt_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vclt_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
return vclt_u64(a, b);
}
-// CHECK-LABEL: @test_vclt_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclt_f32(
+// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
return vclt_f32(v1, v2);
}
-// CHECK-LABEL: @test_vclt_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vclt_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <1 x double> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
return vclt_f64(a, b);
}
-// CHECK-LABEL: @test_vclt_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vclt_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
return vclt_u8(v1, v2);
}
-// CHECK-LABEL: @test_vclt_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vclt_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
return vclt_u16(v1, v2);
}
-// CHECK-LABEL: @test_vclt_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclt_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <2 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
return vclt_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcltq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
return vcltq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcltq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
return vcltq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcltq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
return vcltq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcltq_f32(
+// CHECK-SAME: <4 x float> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
return vcltq_f32(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcltq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <16 x i8> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
return vcltq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcltq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i16> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
return vcltq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcltq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
return vcltq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_s64(
-// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcltq_s64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
return vcltq_s64(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_u64(
-// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcltq_u64(
+// CHECK-SAME: <2 x i64> noundef [[V1:%.*]], <2 x i64> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <2 x i64> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
return vcltq_u64(v1, v2);
}
-// CHECK-LABEL: @test_vcltq_f64(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcltq_f64(
+// CHECK-SAME: <2 x double> noundef [[V1:%.*]], <2 x double> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[V1]], [[V2]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
return vcltq_f64(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_s8(
-// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VHADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vhadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]]
+//
int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
return vhadd_s8(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vhadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
return vhadd_s16(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vhadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
return vhadd_s32(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_u8(
-// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VHADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vhadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]]
+//
uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
return vhadd_u8(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vhadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
return vhadd_u16(v1, v2);
}
-// CHECK-LABEL: @test_vhadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vhadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
return vhadd_u32(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_s8(
-// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vhaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]]
+//
int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
return vhaddq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vhaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
return vhaddq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vhaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
return vhaddq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_u8(
-// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vhaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]]
+//
uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
return vhaddq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vhaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
return vhaddq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vhaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vhaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
return vhaddq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_s8(
-// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VHSUB_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vhsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VHSUB_V_I]]
+//
int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
return vhsub_s8(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHSUB_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vhsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
return vhsub_s16(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHSUB_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vhsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
return vhsub_s32(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_u8(
-// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VHSUB_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vhsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VHSUB_V_I]]
+//
uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
return vhsub_u8(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHSUB_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vhsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
return vhsub_u16(v1, v2);
}
-// CHECK-LABEL: @test_vhsub_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHSUB_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vhsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
return vhsub_u32(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_s8(
-// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vhsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VHSUBQ_V_I]]
+//
int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
return vhsubq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vhsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
return vhsubq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vhsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
return vhsubq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_u8(
-// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vhsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VHSUBQ_V_I]]
+//
uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
return vhsubq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vhsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
return vhsubq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vhsubq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vhsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
return vhsubq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_s8(
-// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VRHADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrhadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]]
+//
int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
return vrhadd_s8(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRHADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrhadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
return vrhadd_s16(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRHADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrhadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
return vrhadd_s32(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_u8(
-// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
-// CHECK: ret <8 x i8> [[VRHADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrhadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]]
+//
uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
return vrhadd_u8(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRHADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrhadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
return vrhadd_u16(v1, v2);
}
-// CHECK-LABEL: @test_vrhadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRHADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrhadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
return vrhadd_u32(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_s8(
-// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrhaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]]
+//
int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
return vrhaddq_s8(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrhaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
return vrhaddq_s16(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrhaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
return vrhaddq_s32(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_u8(
-// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
-// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrhaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]]
+//
uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
return vrhaddq_u8(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrhaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
return vrhaddq_u16(v1, v2);
}
-// CHECK-LABEL: @test_vrhaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrhaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
return vrhaddq_u32(v1, v2);
}
-// CHECK-LABEL: @test_vqadd_s8(
-// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQADD_V_I]]
+//
int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
return vqadd_s8(a, b);
}
-// CHECK-LABEL: @test_vqadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
return vqadd_s16(a, b);
}
-// CHECK-LABEL: @test_vqadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
return vqadd_s32(a, b);
}
-// CHECK-LABEL: @test_vqadd_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqadd_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
return vqadd_s64(a, b);
}
-// CHECK-LABEL: @test_vqadd_u8(
-// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQADD_V_I]]
+//
uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
return vqadd_u8(a, b);
}
-// CHECK-LABEL: @test_vqadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
return vqadd_u16(a, b);
}
-// CHECK-LABEL: @test_vqadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
return vqadd_u32(a, b);
}
-// CHECK-LABEL: @test_vqadd_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQADD_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqadd_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
return vqadd_u64(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s8(
-// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQADDQ_V_I]]
+//
int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
return vqaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
return vqaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
return vqaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
return vqaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u8(
-// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQADDQ_V_I]]
+//
uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
return vqaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
return vqaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
return vqaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
return vqaddq_u64(a, b);
}
-// CHECK-LABEL: @test_vqsub_s8(
-// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSUB_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSUB_V_I]]
+//
int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
return vqsub_s8(a, b);
}
-// CHECK-LABEL: @test_vqsub_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
return vqsub_s16(a, b);
}
-// CHECK-LABEL: @test_vqsub_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
return vqsub_s32(a, b);
}
-// CHECK-LABEL: @test_vqsub_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqsub_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
return vqsub_s64(a, b);
}
-// CHECK-LABEL: @test_vqsub_u8(
-// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSUB_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSUB_V_I]]
+//
uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
return vqsub_u8(a, b);
}
-// CHECK-LABEL: @test_vqsub_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
return vqsub_u16(a, b);
}
-// CHECK-LABEL: @test_vqsub_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
return vqsub_u32(a, b);
}
-// CHECK-LABEL: @test_vqsub_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSUB_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqsub_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
return vqsub_u64(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s8(
-// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSUBQ_V_I]]
+//
int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
return vqsubq_s8(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
return vqsubq_s16(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
return vqsubq_s32(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqsubq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
return vqsubq_s64(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u8(
-// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSUBQ_V_I]]
+//
uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
return vqsubq_u8(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
return vqsubq_u16(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
return vqsubq_u32(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqsubq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
return vqsubq_u64(a, b);
}
-// CHECK-LABEL: @test_vshl_s8(
-// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VSHL_V_I]]
+//
int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
return vshl_s8(a, b);
}
-// CHECK-LABEL: @test_vshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
return vshl_s16(a, b);
}
-// CHECK-LABEL: @test_vshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
return vshl_s32(a, b);
}
-// CHECK-LABEL: @test_vshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
return vshl_s64(a, b);
}
-// CHECK-LABEL: @test_vshl_u8(
-// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VSHL_V_I]]
+//
uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
return vshl_u8(a, b);
}
-// CHECK-LABEL: @test_vshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
return vshl_u16(a, b);
}
-// CHECK-LABEL: @test_vshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
return vshl_u32(a, b);
}
-// CHECK-LABEL: @test_vshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
return vshl_u64(a, b);
}
-// CHECK-LABEL: @test_vshlq_s8(
-// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VSHLQ_V_I]]
+//
int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
return vshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
return vshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
return vshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
return vshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vshlq_u8(
-// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VSHLQ_V_I]]
+//
uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
return vshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
return vshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
return vshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
return vshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vqshl_s8(
-// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_V_I]]
+//
int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
return vqshl_s8(a, b);
}
-// CHECK-LABEL: @test_vqshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
return vqshl_s16(a, b);
}
-// CHECK-LABEL: @test_vqshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
return vqshl_s32(a, b);
}
-// CHECK-LABEL: @test_vqshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
return vqshl_s64(a, b);
}
-// CHECK-LABEL: @test_vqshl_u8(
-// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_V_I]]
+//
uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
return vqshl_u8(a, b);
}
-// CHECK-LABEL: @test_vqshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
return vqshl_u16(a, b);
}
-// CHECK-LABEL: @test_vqshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
return vqshl_u32(a, b);
}
-// CHECK-LABEL: @test_vqshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
return vqshl_u64(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s8(
-// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSHLQ_V_I]]
+//
int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
return vqshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
return vqshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
return vqshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
return vqshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u8(
-// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSHLQ_V_I]]
+//
uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
return vqshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
return vqshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
return vqshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
return vqshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vrshl_s8(
-// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRSHL_V_I]]
+//
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
return vrshl_s8(a, b);
}
-// CHECK-LABEL: @test_vrshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
return vrshl_s16(a, b);
}
-// CHECK-LABEL: @test_vrshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
return vrshl_s32(a, b);
}
-// CHECK-LABEL: @test_vrshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
return vrshl_s64(a, b);
}
-// CHECK-LABEL: @test_vrshl_u8(
-// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRSHL_V_I]]
+//
uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
return vrshl_u8(a, b);
}
-// CHECK-LABEL: @test_vrshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
return vrshl_u16(a, b);
}
-// CHECK-LABEL: @test_vrshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
return vrshl_u32(a, b);
}
-// CHECK-LABEL: @test_vrshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
return vrshl_u64(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s8(
-// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRSHLQ_V_I]]
+//
int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
return vrshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
return vrshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
return vrshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
return vrshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u8(
-// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRSHLQ_V_I]]
+//
uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
return vrshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
return vrshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
return vrshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
return vrshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s8(
-// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQRSHL_V_I]]
+//
int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
return vqrshl_s8(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
return vqrshl_s16(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
return vqrshl_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqrshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
return vqrshl_s64(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u8(
-// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQRSHL_V_I]]
+//
uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
return vqrshl_u8(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
return vqrshl_u16(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
return vqrshl_u32(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqrshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
return vqrshl_u64(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s8(
-// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQRSHLQ_V_I]]
+//
int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
return vqrshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
return vqrshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
return vqrshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqrshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
return vqrshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u8(
-// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQRSHLQ_V_I]]
+//
uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
return vqrshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
return vqrshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
return vqrshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqrshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
return vqrshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vsli_n_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsli_n_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
return vsli_n_p64(a, b, 0);
}
-// CHECK-LABEL: @test_vsliq_n_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsliq_n_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
return vsliq_n_p64(a, b, 0);
}
-// CHECK-LABEL: @test_vmax_s8(
-// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMAX_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmax_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMAX_I]]
+//
int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
return vmax_s8(a, b);
}
-// CHECK-LABEL: @test_vmax_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmax_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VMAX2_I]]
+//
int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
return vmax_s16(a, b);
}
-// CHECK-LABEL: @test_vmax_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmax_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VMAX2_I]]
+//
int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
return vmax_s32(a, b);
}
-// CHECK-LABEL: @test_vmax_u8(
-// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMAX_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmax_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMAX_I]]
+//
uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
return vmax_u8(a, b);
}
-// CHECK-LABEL: @test_vmax_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmax_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VMAX2_I]]
+//
uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
return vmax_u16(a, b);
}
-// CHECK-LABEL: @test_vmax_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmax_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VMAX2_I]]
+//
uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
return vmax_u32(a, b);
}
-// CHECK-LABEL: @test_vmax_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmax_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]])
+// CHECK-NEXT: ret <2 x float> [[VMAX2_I]]
+//
float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
return vmax_f32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s8(
-// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMAX_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmaxq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMAX_I]]
+//
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
return vmaxq_s8(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmaxq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMAX2_I]]
+//
int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
return vmaxq_s16(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmaxq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMAX2_I]]
+//
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
return vmaxq_s32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u8(
-// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMAX_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmaxq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMAX_I]]
+//
uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
return vmaxq_u8(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmaxq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMAX2_I]]
+//
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
return vmaxq_u16(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmaxq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMAX2_I]]
+//
uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
return vmaxq_u32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmaxq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]])
+// CHECK-NEXT: ret <4 x float> [[VMAX2_I]]
+//
float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
return vmaxq_f32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmaxq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]])
+// CHECK-NEXT: ret <2 x double> [[VMAX2_I]]
+//
float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
return vmaxq_f64(a, b);
}
-// CHECK-LABEL: @test_vmin_s8(
-// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMIN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmin_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMIN_I]]
+//
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
return vmin_s8(a, b);
}
-// CHECK-LABEL: @test_vmin_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmin_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VMIN2_I]]
+//
int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
return vmin_s16(a, b);
}
-// CHECK-LABEL: @test_vmin_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmin_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VMIN2_I]]
+//
int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
return vmin_s32(a, b);
}
-// CHECK-LABEL: @test_vmin_u8(
-// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMIN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmin_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMIN_I]]
+//
uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
return vmin_u8(a, b);
}
-// CHECK-LABEL: @test_vmin_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmin_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VMIN2_I]]
+//
uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
return vmin_u16(a, b);
}
-// CHECK-LABEL: @test_vmin_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmin_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VMIN2_I]]
+//
uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
return vmin_u32(a, b);
}
-// CHECK-LABEL: @test_vmin_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmin_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]])
+// CHECK-NEXT: ret <2 x float> [[VMIN2_I]]
+//
float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
return vmin_f32(a, b);
}
-// CHECK-LABEL: @test_vminq_s8(
-// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMIN_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vminq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMIN_I]]
+//
int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
return vminq_s8(a, b);
}
-// CHECK-LABEL: @test_vminq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vminq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMIN2_I]]
+//
int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
return vminq_s16(a, b);
}
-// CHECK-LABEL: @test_vminq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vminq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMIN2_I]]
+//
int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
return vminq_s32(a, b);
}
-// CHECK-LABEL: @test_vminq_u8(
-// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMIN_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vminq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMIN_I]]
+//
uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
return vminq_u8(a, b);
}
-// CHECK-LABEL: @test_vminq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vminq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMIN2_I]]
+//
uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
return vminq_u16(a, b);
}
-// CHECK-LABEL: @test_vminq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vminq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMIN2_I]]
+//
uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
return vminq_u32(a, b);
}
-// CHECK-LABEL: @test_vminq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vminq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]])
+// CHECK-NEXT: ret <4 x float> [[VMIN2_I]]
+//
float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
return vminq_f32(a, b);
}
-// CHECK-LABEL: @test_vminq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vminq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]])
+// CHECK-NEXT: ret <2 x double> [[VMIN2_I]]
+//
float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
return vminq_f64(a, b);
}
-// CHECK-LABEL: @test_vmaxnm_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VMAXNM2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmaxnm_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]])
+// CHECK-NEXT: ret <2 x float> [[VMAXNM2_I]]
+//
float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
return vmaxnm_f32(a, b);
}
-// CHECK-LABEL: @test_vmaxnmq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VMAXNM2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmaxnmq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]])
+// CHECK-NEXT: ret <4 x float> [[VMAXNM2_I]]
+//
float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
return vmaxnmq_f32(a, b);
}
-// CHECK-LABEL: @test_vmaxnmq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VMAXNM2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmaxnmq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]])
+// CHECK-NEXT: ret <2 x double> [[VMAXNM2_I]]
+//
float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
return vmaxnmq_f64(a, b);
}
-// CHECK-LABEL: @test_vminnm_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VMINNM2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vminnm_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]])
+// CHECK-NEXT: ret <2 x float> [[VMINNM2_I]]
+//
float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
return vminnm_f32(a, b);
}
-// CHECK-LABEL: @test_vminnmq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VMINNM2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vminnmq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]])
+// CHECK-NEXT: ret <4 x float> [[VMINNM2_I]]
+//
float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
return vminnmq_f32(a, b);
}
-// CHECK-LABEL: @test_vminnmq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VMINNM2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vminnmq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]])
+// CHECK-NEXT: ret <2 x double> [[VMINNM2_I]]
+//
float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
return vminnmq_f64(a, b);
}
-// CHECK-LABEL: @test_vpmax_s8(
-// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMAX_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]]
+//
int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
return vpmax_s8(a, b);
}
-// CHECK-LABEL: @test_vpmax_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]]
+//
int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
return vpmax_s16(a, b);
}
-// CHECK-LABEL: @test_vpmax_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]]
+//
int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
return vpmax_s32(a, b);
}
-// CHECK-LABEL: @test_vpmax_u8(
-// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMAX_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpmax_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMAX_I]]
+//
uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
return vpmax_u8(a, b);
}
-// CHECK-LABEL: @test_vpmax_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpmax_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VPMAX2_I]]
+//
uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
return vpmax_u16(a, b);
}
-// CHECK-LABEL: @test_vpmax_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpmax_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPMAX2_I]]
+//
uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
return vpmax_u32(a, b);
}
-// CHECK-LABEL: @test_vpmax_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vpmax_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]])
+// CHECK-NEXT: ret <2 x float> [[VPMAX2_I]]
+//
float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
return vpmax_f32(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_s8(
-// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPMAX_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]]
+//
int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
return vpmaxq_s8(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]]
+//
int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
return vpmaxq_s16(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]]
+//
int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
return vpmaxq_s32(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_u8(
-// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPMAX_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpmaxq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPMAX_I]]
+//
uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
return vpmaxq_u8(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpmaxq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VPMAX2_I]]
+//
uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
return vpmaxq_u16(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpmaxq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPMAX2_I]]
+//
uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
return vpmaxq_u32(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vpmaxq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]])
+// CHECK-NEXT: ret <4 x float> [[VPMAX2_I]]
+//
float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
return vpmaxq_f32(a, b);
}
-// CHECK-LABEL: @test_vpmaxq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VPMAX2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vpmaxq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]])
+// CHECK-NEXT: ret <2 x double> [[VPMAX2_I]]
+//
float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
return vpmaxq_f64(a, b);
}
-// CHECK-LABEL: @test_vpmin_s8(
-// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMIN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpmin_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMIN_I]]
+//
int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
return vpmin_s8(a, b);
}
-// CHECK-LABEL: @test_vpmin_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpmin_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VPMIN2_I]]
+//
int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
return vpmin_s16(a, b);
}
-// CHECK-LABEL: @test_vpmin_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpmin_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPMIN2_I]]
+//
int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
return vpmin_s32(a, b);
}
-// CHECK-LABEL: @test_vpmin_u8(
-// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMIN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpmin_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMIN_I]]
+//
uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
return vpmin_u8(a, b);
}
-// CHECK-LABEL: @test_vpmin_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpmin_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VPMIN2_I]]
+//
uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
return vpmin_u16(a, b);
}
-// CHECK-LABEL: @test_vpmin_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpmin_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPMIN2_I]]
+//
uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
return vpmin_u32(a, b);
}
-// CHECK-LABEL: @test_vpmin_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vpmin_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]])
+// CHECK-NEXT: ret <2 x float> [[VPMIN2_I]]
+//
float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
return vpmin_f32(a, b);
}
-// CHECK-LABEL: @test_vpminq_s8(
-// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPMIN_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpminq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPMIN_I]]
+//
int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
return vpminq_s8(a, b);
}
-// CHECK-LABEL: @test_vpminq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpminq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VPMIN2_I]]
+//
int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
return vpminq_s16(a, b);
}
-// CHECK-LABEL: @test_vpminq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpminq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPMIN2_I]]
+//
int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
return vpminq_s32(a, b);
}
-// CHECK-LABEL: @test_vpminq_u8(
-// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPMIN_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpminq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPMIN_I]]
+//
uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
return vpminq_u8(a, b);
}
-// CHECK-LABEL: @test_vpminq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpminq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VPMIN2_I]]
+//
uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
return vpminq_u16(a, b);
}
-// CHECK-LABEL: @test_vpminq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpminq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPMIN2_I]]
+//
uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
return vpminq_u32(a, b);
}
-// CHECK-LABEL: @test_vpminq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vpminq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]])
+// CHECK-NEXT: ret <4 x float> [[VPMIN2_I]]
+//
float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
return vpminq_f32(a, b);
}
-// CHECK-LABEL: @test_vpminq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VPMIN2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vpminq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]])
+// CHECK-NEXT: ret <2 x double> [[VPMIN2_I]]
+//
float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
return vpminq_f64(a, b);
}
-// CHECK-LABEL: @test_vpmaxnm_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VPMAXNM2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vpmaxnm_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]])
+// CHECK-NEXT: ret <2 x float> [[VPMAXNM2_I]]
+//
float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
return vpmaxnm_f32(a, b);
}
-// CHECK-LABEL: @test_vpmaxnmq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VPMAXNM2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vpmaxnmq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]])
+// CHECK-NEXT: ret <4 x float> [[VPMAXNM2_I]]
+//
float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
return vpmaxnmq_f32(a, b);
}
-// CHECK-LABEL: @test_vpmaxnmq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VPMAXNM2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vpmaxnmq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]])
+// CHECK-NEXT: ret <2 x double> [[VPMAXNM2_I]]
+//
float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
return vpmaxnmq_f64(a, b);
}
-// CHECK-LABEL: @test_vpminnm_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VPMINNM2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vpminnm_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]])
+// CHECK-NEXT: ret <2 x float> [[VPMINNM2_I]]
+//
float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
return vpminnm_f32(a, b);
}
-// CHECK-LABEL: @test_vpminnmq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VPMINNM2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vpminnmq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]])
+// CHECK-NEXT: ret <4 x float> [[VPMINNM2_I]]
+//
float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
return vpminnmq_f32(a, b);
}
-// CHECK-LABEL: @test_vpminnmq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VPMINNM2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vpminnmq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]])
+// CHECK-NEXT: ret <2 x double> [[VPMINNM2_I]]
+//
float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
return vpminnmq_f64(a, b);
}
-// CHECK-LABEL: @test_vpadd_s8(
-// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPADD_V_I]]
+//
int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
return vpadd_s8(a, b);
}
-// CHECK-LABEL: @test_vpadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
return vpadd_s16(a, b);
}
-// CHECK-LABEL: @test_vpadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
return vpadd_s32(a, b);
}
-// CHECK-LABEL: @test_vpadd_u8(
-// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPADD_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vpadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPADD_V_I]]
+//
uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
return vpadd_u8(a, b);
}
-// CHECK-LABEL: @test_vpadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPADD_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
return vpadd_u16(a, b);
}
-// CHECK-LABEL: @test_vpadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
return vpadd_u32(a, b);
}
-// CHECK-LABEL: @test_vpadd_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VPADD_V2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vpadd_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
return vpadd_f32(a, b);
}
-// CHECK-LABEL: @test_vpaddq_s8(
-// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPADDQ_V_I]]
+//
int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
return vpaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vpaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
return vpaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vpaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
return vpaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vpaddq_u8(
-// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VPADDQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vpaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VPADDQ_V_I]]
+//
uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
return vpaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vpaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
return vpaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vpaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
return vpaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vpaddq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vpaddq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
return vpaddq_f32(a, b);
}
-// CHECK-LABEL: @test_vpaddq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x double> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vpaddq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP5]]
+//
float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
return vpaddq_f64(a, b);
}
-// CHECK-LABEL: @test_vqdmulh_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqdmulh_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
return vqdmulh_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulh_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqdmulh_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
return vqdmulh_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqdmulhq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqdmulhq_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmulhq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqdmulhq_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulh_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrdmulh_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
return vqrdmulh_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulh_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrdmulh_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
return vqrdmulh_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrdmulhq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqrdmulhq_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrdmulhq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqrdmulhq_s32(a, b);
}
-// CHECK-LABEL: @test_vmulx_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x float> [[VMULX2_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vmulx_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]])
+// CHECK-NEXT: ret <2 x float> [[VMULX2_I]]
+//
float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
return vmulx_f32(a, b);
}
-// CHECK-LABEL: @test_vmulxq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x float> [[VMULX2_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vmulxq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]])
+// CHECK-NEXT: ret <4 x float> [[VMULX2_I]]
+//
float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
return vmulxq_f32(a, b);
}
-// CHECK-LABEL: @test_vmulxq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
-// CHECK: ret <2 x double> [[VMULX2_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vmulxq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]])
+// CHECK-NEXT: ret <2 x double> [[VMULX2_I]]
+//
float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
return vmulxq_f64(a, b);
}
-// CHECK-LABEL: @test_vshl_n_s8(
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3)
-// CHECK: ret <8 x i8> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
+//
int8x8_t test_vshl_n_s8(int8x8_t a) {
return vshl_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshl_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <4 x i16> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
+//
int16x4_t test_vshl_n_s16(int16x4_t a) {
return vshl_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vshl_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <2 x i32> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
+//
int32x2_t test_vshl_n_s32(int32x2_t a) {
return vshl_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_s8(
-// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3)
-// CHECK: ret <16 x i8> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
+//
int8x16_t test_vshlq_n_s8(int8x16_t a) {
return vshlq_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
+//
int16x8_t test_vshlq_n_s16(int16x8_t a) {
return vshlq_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <4 x i32> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
+//
int32x4_t test_vshlq_n_s32(int32x4_t a) {
return vshlq_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK: ret <2 x i64> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
+// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
+//
int64x2_t test_vshlq_n_s64(int64x2_t a) {
return vshlq_n_s64(a, 3);
}
-// CHECK-LABEL: @test_vshl_n_u8(
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3)
-// CHECK: ret <8 x i8> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
+//
uint8x8_t test_vshl_n_u8(uint8x8_t a) {
return vshl_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshl_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <4 x i16> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
+//
uint16x4_t test_vshl_n_u16(uint16x4_t a) {
return vshl_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vshl_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <2 x i32> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
+//
uint32x2_t test_vshl_n_u32(uint32x2_t a) {
return vshl_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_u8(
-// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3)
-// CHECK: ret <16 x i8> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
+//
uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
return vshlq_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
+//
uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
return vshlq_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <4 x i32> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
+//
uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
return vshlq_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vshlq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK: ret <2 x i64> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
+// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
+//
uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
return vshlq_n_u64(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_s8(
-// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 3)
-// CHECK: ret <8 x i8> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshr_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <8 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <8 x i8> [[VSHR_N]]
+//
int8x8_t test_vshr_n_s8(int8x8_t a) {
return vshr_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <4 x i16> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshr_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <4 x i16> [[VSHR_N]]
+//
int16x4_t test_vshr_n_s16(int16x4_t a) {
return vshr_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <2 x i32> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshr_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSHR_N]]
+//
int32x2_t test_vshr_n_s32(int32x2_t a) {
return vshr_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_s8(
-// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 3)
-// CHECK: ret <16 x i8> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshrq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <16 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <16 x i8> [[VSHR_N]]
+//
int8x16_t test_vshrq_n_s8(int8x16_t a) {
return vshrq_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshrq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHR_N]]
+//
int16x8_t test_vshrq_n_s16(int16x8_t a) {
return vshrq_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <4 x i32> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshrq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSHR_N]]
+//
int32x4_t test_vshrq_n_s32(int32x4_t a) {
return vshrq_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK: ret <2 x i64> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshrq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3)
+// CHECK-NEXT: ret <2 x i64> [[VSHR_N]]
+//
int64x2_t test_vshrq_n_s64(int64x2_t a) {
return vshrq_n_s64(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_u8(
-// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 3)
-// CHECK: ret <8 x i8> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshr_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <8 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <8 x i8> [[VSHR_N]]
+//
uint8x8_t test_vshr_n_u8(uint8x8_t a) {
return vshr_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <4 x i16> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshr_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <4 x i16> [[VSHR_N]]
+//
uint16x4_t test_vshr_n_u16(uint16x4_t a) {
return vshr_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vshr_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <2 x i32> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshr_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSHR_N]]
+//
uint32x2_t test_vshr_n_u32(uint32x2_t a) {
return vshr_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_u8(
-// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 3)
-// CHECK: ret <16 x i8> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshrq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <16 x i8> [[A]], splat (i8 3)
+// CHECK-NEXT: ret <16 x i8> [[VSHR_N]]
+//
uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
return vshrq_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshrq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHR_N]]
+//
uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
return vshrq_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK: ret <4 x i32> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshrq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSHR_N]]
+//
uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
return vshrq_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vshrq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK: ret <2 x i64> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshrq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3)
+// CHECK-NEXT: ret <2 x i64> [[VSHR_N]]
+//
uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
return vshrq_n_u64(a, 3);
}
-// CHECK-LABEL: @test_vsra_n_s8(
-// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 3)
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsra_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <8 x i8> [[B]], splat (i8 3)
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
return vsra_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsra_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3)
-// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i16> [[TMP4]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsra_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
return vsra_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsra_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3)
-// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i32> [[TMP4]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsra_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
return vsra_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_s8(
-// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 3)
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsraq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <16 x i8> [[B]], splat (i8 3)
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
return vsraq_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3)
-// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <8 x i16> [[TMP4]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsraq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
return vsraq_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3)
-// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i32> [[TMP4]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsraq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
return vsraq_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3)
-// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i64> [[TMP4]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsraq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
+//
int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
return vsraq_n_s64(a, b, 3);
}
-// CHECK-LABEL: @test_vsra_n_u8(
-// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 3)
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsra_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <8 x i8> [[B]], splat (i8 3)
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
return vsra_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsra_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3)
-// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i16> [[TMP4]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsra_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
return vsra_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsra_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3)
-// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i32> [[TMP4]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsra_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
return vsra_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_u8(
-// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 3)
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsraq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <16 x i8> [[B]], splat (i8 3)
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsraq_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3)
-// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <8 x i16> [[TMP4]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsraq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsraq_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3)
-// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i32> [[TMP4]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsraq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsraq_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsraq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3)
-// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i64> [[TMP4]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsraq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
+//
uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsraq_n_u64(a, b, 3);
}
-// CHECK-LABEL: @test_vrshr_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3))
-// CHECK: ret <8 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshr_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 -3))
+// CHECK-NEXT: ret <8 x i8> [[VRSHR_N]]
+//
int8x8_t test_vrshr_n_s8(int8x8_t a) {
return vrshr_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vrshr_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
-// CHECK: ret <4 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshr_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+// CHECK-NEXT: ret <4 x i16> [[VRSHR_N1]]
+//
int16x4_t test_vrshr_n_s16(int16x4_t a) {
return vrshr_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vrshr_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
-// CHECK: ret <2 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshr_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+// CHECK-NEXT: ret <2 x i32> [[VRSHR_N1]]
+//
int32x2_t test_vrshr_n_s32(int32x2_t a) {
return vrshr_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3))
-// CHECK: ret <16 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshrq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 -3))
+// CHECK-NEXT: ret <16 x i8> [[VRSHR_N]]
+//
int8x16_t test_vrshrq_n_s8(int8x16_t a) {
return vrshrq_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
-// CHECK: ret <8 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshrq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+// CHECK-NEXT: ret <8 x i16> [[VRSHR_N1]]
+//
int16x8_t test_vrshrq_n_s16(int16x8_t a) {
return vrshrq_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
-// CHECK: ret <4 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshrq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+// CHECK-NEXT: ret <4 x i32> [[VRSHR_N1]]
+//
int32x4_t test_vrshrq_n_s32(int32x4_t a) {
return vrshrq_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
-// CHECK: ret <2 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrshrq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+// CHECK-NEXT: ret <2 x i64> [[VRSHR_N1]]
+//
int64x2_t test_vrshrq_n_s64(int64x2_t a) {
return vrshrq_n_s64(a, 3);
}
-// CHECK-LABEL: @test_vrshr_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3))
-// CHECK: ret <8 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshr_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 -3))
+// CHECK-NEXT: ret <8 x i8> [[VRSHR_N]]
+//
uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
return vrshr_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vrshr_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
-// CHECK: ret <4 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshr_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+// CHECK-NEXT: ret <4 x i16> [[VRSHR_N1]]
+//
uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
return vrshr_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vrshr_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
-// CHECK: ret <2 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshr_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+// CHECK-NEXT: ret <2 x i32> [[VRSHR_N1]]
+//
uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
return vrshr_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3))
-// CHECK: ret <16 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshrq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 -3))
+// CHECK-NEXT: ret <16 x i8> [[VRSHR_N]]
+//
uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
return vrshrq_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
-// CHECK: ret <8 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshrq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+// CHECK-NEXT: ret <8 x i16> [[VRSHR_N1]]
+//
uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
return vrshrq_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
-// CHECK: ret <4 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshrq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+// CHECK-NEXT: ret <4 x i32> [[VRSHR_N1]]
+//
uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
return vrshrq_n_u32(a, 3);
}
-// CHECK-LABEL: @test_vrshrq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
-// CHECK: ret <2 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrshrq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+// CHECK-NEXT: ret <2 x i64> [[VRSHR_N1]]
+//
uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
return vrshrq_n_u64(a, 3);
}
-// CHECK-LABEL: @test_vrsra_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3))
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrsra_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[B]], <8 x i8> splat (i8 -3))
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VRSHR_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
return vrsra_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vrsra_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <4 x i16> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrsra_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <4 x i16> [[TMP3]]
+//
int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
return vrsra_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vrsra_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <2 x i32> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrsra_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
return vrsra_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3))
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrsraq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[B]], <16 x i8> splat (i8 -3))
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VRSHR_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
return vrsraq_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <8 x i16> [[TMP3]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrsraq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <8 x i16> [[TMP3]]
+//
int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
return vrsraq_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrsraq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
return vrsraq_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <2 x i64> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrsraq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
return vrsraq_n_s64(a, b, 3);
}
-// CHECK-LABEL: @test_vrsra_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3))
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrsra_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[B]], <8 x i8> splat (i8 -3))
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VRSHR_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
return vrsra_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vrsra_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <4 x i16> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrsra_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <4 x i16> [[TMP3]]
+//
uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
return vrsra_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vrsra_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <2 x i32> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrsra_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
return vrsra_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3))
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrsraq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[B]], <16 x i8> splat (i8 -3))
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VRSHR_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
return vrsraq_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <8 x i16> [[TMP3]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrsraq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <8 x i16> [[TMP3]]
+//
uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
return vrsraq_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrsraq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
return vrsraq_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vrsraq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <2 x i64> [[TMP3]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vrsraq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
return vrsraq_n_u64(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_s8(
-// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsri_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSRI_N]]
+//
int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
return vsri_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
-// CHECK: ret <4 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsri_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i16> [[VSRI_N2]]
+//
int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
return vsri_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
-// CHECK: ret <2 x i32> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsri_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSRI_N2]]
+//
int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
return vsri_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_s8(
-// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsriq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSRI_N]]
+//
int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
return vsriq_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
-// CHECK: ret <8 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsriq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <8 x i16> [[VSRI_N2]]
+//
int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
return vsriq_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
-// CHECK: ret <4 x i32> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsriq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSRI_N2]]
+//
int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
return vsriq_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
-// CHECK: ret <2 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsriq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i64> [[VSRI_N2]]
+//
int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
return vsriq_n_s64(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_u8(
-// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsri_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSRI_N]]
+//
uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
return vsri_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
-// CHECK: ret <4 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsri_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i16> [[VSRI_N2]]
+//
uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
return vsri_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
-// CHECK: ret <2 x i32> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsri_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSRI_N2]]
+//
uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
return vsri_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_u8(
-// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsriq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSRI_N]]
+//
uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsriq_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
-// CHECK: ret <8 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsriq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <8 x i16> [[VSRI_N2]]
+//
uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsriq_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
-// CHECK: ret <4 x i32> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsriq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSRI_N2]]
+//
uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsriq_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
-// CHECK: ret <2 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsriq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i64> [[VSRI_N2]]
+//
uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsriq_n_u64(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_p8(
-// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsri_n_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSRI_N]]
+//
poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
return vsri_n_p8(a, b, 3);
}
-// CHECK-LABEL: @test_vsri_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
-// CHECK: ret <4 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsri_n_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
+// CHECK-NEXT: ret <4 x i16> [[VSRI_N2]]
+//
poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
return vsri_n_p16(a, b, 15);
}
-// CHECK-LABEL: @test_vsriq_n_p8(
-// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSRI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsriq_n_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSRI_N]]
+//
poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
return vsriq_n_p8(a, b, 3);
}
-// CHECK-LABEL: @test_vsriq_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
-// CHECK: ret <8 x i16> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsriq_n_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
+// CHECK-NEXT: ret <8 x i16> [[VSRI_N2]]
+//
poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
return vsriq_n_p16(a, b, 15);
}
-// CHECK-LABEL: @test_vsli_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsli_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
return vsli_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsli_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
return vsli_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsli_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
return vsli_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsliq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
return vsliq_n_s8(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsliq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
return vsliq_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsliq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
return vsliq_n_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsliq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
return vsliq_n_s64(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsli_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
return vsli_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsli_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
return vsli_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsli_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
return vsli_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsliq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsliq_n_u8(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsliq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsliq_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsliq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsliq_n_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsliq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsliq_n_u64(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsli_n_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
return vsli_n_p8(a, b, 3);
}
-// CHECK-LABEL: @test_vsli_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsli_n_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
return vsli_n_p16(a, b, 15);
}
-// CHECK-LABEL: @test_vsliq_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsliq_n_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], i32 3)
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
return vsliq_n_p8(a, b, 3);
}
-// CHECK-LABEL: @test_vsliq_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsliq_n_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
return vsliq_n_p16(a, b, 15);
}
-// CHECK-LABEL: @test_vqshlu_n_s8(
-// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 3))
-// CHECK: ret <8 x i8> [[VQSHLU_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshlu_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 3))
+// CHECK-NEXT: ret <8 x i8> [[VQSHLU_N]]
+//
uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
return vqshlu_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vqshlu_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3))
-// CHECK: ret <4 x i16> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshlu_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3))
+// CHECK-NEXT: ret <4 x i16> [[VQSHLU_N1]]
+//
uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
return vqshlu_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqshlu_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3))
-// CHECK: ret <2 x i32> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshlu_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3))
+// CHECK-NEXT: ret <2 x i32> [[VQSHLU_N1]]
+//
uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
return vqshlu_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vqshluq_n_s8(
-// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 3))
-// CHECK: ret <16 x i8> [[VQSHLU_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshluq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 3))
+// CHECK-NEXT: ret <16 x i8> [[VQSHLU_N]]
+//
uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
return vqshluq_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vqshluq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3))
-// CHECK: ret <8 x i16> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshluq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3))
+// CHECK-NEXT: ret <8 x i16> [[VQSHLU_N1]]
+//
uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
return vqshluq_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqshluq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3))
-// CHECK: ret <4 x i32> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshluq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3))
+// CHECK-NEXT: ret <4 x i32> [[VQSHLU_N1]]
+//
uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
return vqshluq_n_s32(a, 3);
}
-// CHECK-LABEL: @test_vqshluq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3))
-// CHECK: ret <2 x i64> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqshluq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3))
+// CHECK-NEXT: ret <2 x i64> [[VQSHLU_N1]]
+//
uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
return vqshluq_n_s64(a, 3);
}
-// CHECK-LABEL: @test_vshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSHRN_N]]
+//
int8x8_t test_vshrn_n_s16(int16x8_t a) {
return vshrn_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSHRN_N]]
+//
int16x4_t test_vshrn_n_s32(int32x4_t a) {
return vshrn_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSHRN_N]]
+//
int32x2_t test_vshrn_n_s64(int64x2_t a) {
return vshrn_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSHRN_N]]
+//
uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
return vshrn_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSHRN_N]]
+//
uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
return vshrn_n_u32(a, 9);
}
-// CHECK-LABEL: @test_vshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSHRN_N]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSHRN_N]]
+//
uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
return vshrn_n_u64(a, 19);
}
-// CHECK-LABEL: @test_vshrn_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshrn_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
return vshrn_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vshrn_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshrn_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
return vshrn_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vshrn_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshrn_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
return vshrn_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vshrn_high_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vshrn_high_n_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
return vshrn_high_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vshrn_high_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshrn_high_n_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
return vshrn_high_n_u32(a, b, 9);
}
-// CHECK-LABEL: @test_vshrn_high_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshrn_high_n_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
return vshrn_high_n_u64(a, b, 19);
}
-// CHECK-LABEL: @test_vqshrun_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQSHRUN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrun_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQSHRUN_N1]]
+//
uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
return vqshrun_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqshrun_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQSHRUN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrun_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQSHRUN_N1]]
+//
uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
return vqshrun_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vqshrun_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQSHRUN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrun_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQSHRUN_N1]]
+//
uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
return vqshrun_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vqshrun_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshrun_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRUN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
return vqshrun_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqshrun_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshrun_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRUN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
return vqshrun_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vqshrun_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshrun_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRUN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
return vqshrun_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vrshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VRSHRN_N1]]
+//
int8x8_t test_vrshrn_n_s16(int16x8_t a) {
return vrshrn_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vrshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]]
+//
int16x4_t test_vrshrn_n_s32(int32x4_t a) {
return vrshrn_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vrshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VRSHRN_N1]]
+//
int32x2_t test_vrshrn_n_s64(int64x2_t a) {
return vrshrn_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vrshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VRSHRN_N1]]
+//
uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
return vrshrn_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vrshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]]
+//
uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
return vrshrn_n_u32(a, 9);
}
-// CHECK-LABEL: @test_vrshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VRSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VRSHRN_N1]]
+//
uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
return vrshrn_n_u64(a, 19);
}
-// CHECK-LABEL: @test_vrshrn_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshrn_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
return vrshrn_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vrshrn_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshrn_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
return vrshrn_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vrshrn_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshrn_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
return vrshrn_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vrshrn_high_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrshrn_high_n_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
return vrshrn_high_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vrshrn_high_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrshrn_high_n_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
return vrshrn_high_n_u32(a, b, 9);
}
-// CHECK-LABEL: @test_vrshrn_high_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrshrn_high_n_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
return vrshrn_high_n_u64(a, b, 19);
}
-// CHECK-LABEL: @test_vqrshrun_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrun_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRUN_N1]]
+//
uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
return vqrshrun_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqrshrun_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrun_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRUN_N1]]
+//
uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
return vqrshrun_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vqrshrun_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrun_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRUN_N1]]
+//
uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
return vqrshrun_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vqrshrun_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrun_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRUN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQRSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
return vqrshrun_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqrshrun_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrun_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRUN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQRSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
return vqrshrun_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vqrshrun_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrun_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRUN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQRSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
return vqrshrun_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vqshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]]
+//
int8x8_t test_vqshrn_n_s16(int16x8_t a) {
return vqshrn_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]]
+//
int16x4_t test_vqshrn_n_s32(int32x4_t a) {
return vqshrn_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vqshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]]
+//
int32x2_t test_vqshrn_n_s64(int64x2_t a) {
return vqshrn_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vqshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]]
+//
uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
return vqshrn_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vqshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]]
+//
uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
return vqshrn_n_u32(a, 9);
}
-// CHECK-LABEL: @test_vqshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]]
+//
uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
return vqshrn_n_u64(a, 19);
}
-// CHECK-LABEL: @test_vqshrn_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshrn_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
return vqshrn_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqshrn_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshrn_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
return vqshrn_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vqshrn_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshrn_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
return vqshrn_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vqshrn_high_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshrn_high_n_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
return vqshrn_high_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vqshrn_high_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshrn_high_n_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
return vqshrn_high_n_u32(a, b, 9);
}
-// CHECK-LABEL: @test_vqshrn_high_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshrn_high_n_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
return vqshrn_high_n_u64(a, b, 19);
}
-// CHECK-LABEL: @test_vqrshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRN_N1]]
+//
int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
return vqrshrn_n_s16(a, 3);
}
-// CHECK-LABEL: @test_vqrshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRN_N1]]
+//
int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
return vqrshrn_n_s32(a, 9);
}
-// CHECK-LABEL: @test_vqrshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRN_N1]]
+//
int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
return vqrshrn_n_s64(a, 19);
}
-// CHECK-LABEL: @test_vqrshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK: ret <8 x i8> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRN_N1]]
+//
uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
return vqrshrn_n_u16(a, 3);
}
-// CHECK-LABEL: @test_vqrshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK: ret <4 x i16> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRN_N1]]
+//
uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
return vqrshrn_n_u32(a, 9);
}
-// CHECK-LABEL: @test_vqrshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK: ret <2 x i32> [[VQRSHRN_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRN_N1]]
+//
uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
return vqrshrn_n_u64(a, 19);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrn_high_n_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
return vqrshrn_high_n_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrn_high_n_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
return vqrshrn_high_n_s32(a, b, 9);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrn_high_n_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
return vqrshrn_high_n_s64(a, b, 19);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrn_high_n_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
return vqrshrn_high_n_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrn_high_n_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
return vqrshrn_high_n_u32(a, b, 9);
}
-// CHECK-LABEL: @test_vqrshrn_high_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrn_high_n_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
return vqrshrn_high_n_u64(a, b, 19);
}
-// CHECK-LABEL: @test_vshll_n_s8(
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
int16x8_t test_vshll_n_s8(int8x8_t a) {
return vshll_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshll_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
int32x4_t test_vshll_n_s16(int16x4_t a) {
return vshll_n_s16(a, 9);
}
-// CHECK-LABEL: @test_vshll_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
int64x2_t test_vshll_n_s32(int32x2_t a) {
return vshll_n_s32(a, 19);
}
-// CHECK-LABEL: @test_vshll_n_u8(
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
uint16x8_t test_vshll_n_u8(uint8x8_t a) {
return vshll_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshll_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
uint32x4_t test_vshll_n_u16(uint16x4_t a) {
return vshll_n_u16(a, 9);
}
-// CHECK-LABEL: @test_vshll_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
uint64x2_t test_vshll_n_u32(uint32x2_t a) {
return vshll_n_u32(a, 19);
}
-// CHECK-LABEL: @test_vshll_high_n_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
int16x8_t test_vshll_high_n_s8(int8x16_t a) {
return vshll_high_n_s8(a, 3);
}
-// CHECK-LABEL: @test_vshll_high_n_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
int32x4_t test_vshll_high_n_s16(int16x8_t a) {
return vshll_high_n_s16(a, 9);
}
-// CHECK-LABEL: @test_vshll_high_n_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
int64x2_t test_vshll_high_n_s32(int32x4_t a) {
return vshll_high_n_s32(a, 19);
}
-// CHECK-LABEL: @test_vshll_high_n_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
return vshll_high_n_u8(a, 3);
}
-// CHECK-LABEL: @test_vshll_high_n_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
return vshll_high_n_u16(a, 9);
}
-// CHECK-LABEL: @test_vshll_high_n_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
return vshll_high_n_u32(a, 19);
}
-// CHECK-LABEL: @test_vmovl_s8(
-// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
int16x8_t test_vmovl_s8(int8x8_t a) {
return vmovl_s8(a);
}
-// CHECK-LABEL: @test_vmovl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
int32x4_t test_vmovl_s16(int16x4_t a) {
return vmovl_s16(a);
}
-// CHECK-LABEL: @test_vmovl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
int64x2_t test_vmovl_s32(int32x2_t a) {
return vmovl_s32(a);
}
-// CHECK-LABEL: @test_vmovl_u8(
-// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
uint16x8_t test_vmovl_u8(uint8x8_t a) {
return vmovl_u8(a);
}
-// CHECK-LABEL: @test_vmovl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
uint32x4_t test_vmovl_u16(uint16x4_t a) {
return vmovl_u16(a);
}
-// CHECK-LABEL: @test_vmovl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
uint64x2_t test_vmovl_u32(uint32x2_t a) {
return vmovl_u32(a);
}
-// CHECK-LABEL: @test_vmovl_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vmovl_high_s8(int8x16_t a) {
return vmovl_high_s8(a);
}
-// CHECK-LABEL: @test_vmovl_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vmovl_high_s16(int16x8_t a) {
return vmovl_high_s16(a);
}
-// CHECK-LABEL: @test_vmovl_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vmovl_high_s32(int32x4_t a) {
return vmovl_high_s32(a);
}
-// CHECK-LABEL: @test_vmovl_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
return vmovl_high_u8(a);
}
-// CHECK-LABEL: @test_vmovl_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
return vmovl_high_u16(a);
}
-// CHECK-LABEL: @test_vmovl_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
return vmovl_high_u32(a);
}
-// CHECK-LABEL: @test_vcvt_n_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
-// CHECK: ret <2 x float> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_n_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <2 x float> [[VCVT_N1]]
+//
float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
return vcvt_n_f32_s32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
-// CHECK: ret <4 x float> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvtq_n_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <4 x float> [[VCVT_N1]]
+//
float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
return vcvtq_n_f32_s32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
-// CHECK: ret <2 x double> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvtq_n_f64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// CHECK-NEXT: ret <2 x double> [[VCVT_N1]]
+//
float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
return vcvtq_n_f64_s64(a, 50);
}
-// CHECK-LABEL: @test_vcvt_n_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
-// CHECK: ret <2 x float> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_n_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <2 x float> [[VCVT_N1]]
+//
float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
return vcvt_n_f32_u32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
-// CHECK: ret <4 x float> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvtq_n_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <4 x float> [[VCVT_N1]]
+//
float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
return vcvtq_n_f32_u32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
-// CHECK: ret <2 x double> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvtq_n_f64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// CHECK-NEXT: ret <2 x double> [[VCVT_N1]]
+//
float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
return vcvtq_n_f64_u64(a, 50);
}
-// CHECK-LABEL: @test_vcvt_n_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
-// CHECK: ret <2 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_n_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <2 x i32> [[VCVT_N1]]
+//
int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
return vcvt_n_s32_f32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
-// CHECK: ret <4 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_n_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <4 x i32> [[VCVT_N1]]
+//
int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
return vcvtq_n_s32_f32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
-// CHECK: ret <2 x i64> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_n_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// CHECK-NEXT: ret <2 x i64> [[VCVT_N1]]
+//
int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
return vcvtq_n_s64_f64(a, 50);
}
-// CHECK-LABEL: @test_vcvt_n_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
-// CHECK: ret <2 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_n_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <2 x i32> [[VCVT_N1]]
+//
uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
return vcvt_n_u32_f32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
-// CHECK: ret <4 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_n_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// CHECK-NEXT: ret <4 x i32> [[VCVT_N1]]
+//
uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
return vcvtq_n_u32_f32(a, 31);
}
-// CHECK-LABEL: @test_vcvtq_n_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
-// CHECK: ret <2 x i64> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_n_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// CHECK-NEXT: ret <2 x i64> [[VCVT_N1]]
+//
uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
return vcvtq_n_u64_f64(a, 50);
}
-// CHECK-LABEL: @test_vaddl_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
return vaddl_s8(a, b);
}
-// CHECK-LABEL: @test_vaddl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
return vaddl_s16(a, b);
}
-// CHECK-LABEL: @test_vaddl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
return vaddl_s32(a, b);
}
-// CHECK-LABEL: @test_vaddl_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
return vaddl_u8(a, b);
}
-// CHECK-LABEL: @test_vaddl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
return vaddl_u16(a, b);
}
-// CHECK-LABEL: @test_vaddl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
return vaddl_u32(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_s8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
return vaddl_high_s8(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_s16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
return vaddl_high_s16(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_s32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
return vaddl_high_s32(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_u8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
return vaddl_high_u8(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_u16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
return vaddl_high_u16(a, b);
}
-// CHECK-LABEL: @test_vaddl_high_u32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
return vaddl_high_u32(a, b);
}
-// CHECK-LABEL: @test_vaddw_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
return vaddw_s8(a, b);
}
-// CHECK-LABEL: @test_vaddw_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
return vaddw_s16(a, b);
}
-// CHECK-LABEL: @test_vaddw_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
return vaddw_s32(a, b);
}
-// CHECK-LABEL: @test_vaddw_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
return vaddw_u8(a, b);
}
-// CHECK-LABEL: @test_vaddw_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
return vaddw_u16(a, b);
}
-// CHECK-LABEL: @test_vaddw_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
return vaddw_u32(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_s8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_high_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
return vaddw_high_s8(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_s16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
return vaddw_high_s16(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_s32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
return vaddw_high_s32(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_u8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_high_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
return vaddw_high_u8(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_u16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_high_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
return vaddw_high_u16(a, b);
}
-// CHECK-LABEL: @test_vaddw_high_u32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_high_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
return vaddw_high_u32(a, b);
}
-// CHECK-LABEL: @test_vsubl_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
return vsubl_s8(a, b);
}
-// CHECK-LABEL: @test_vsubl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
return vsubl_s16(a, b);
}
-// CHECK-LABEL: @test_vsubl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
return vsubl_s32(a, b);
}
-// CHECK-LABEL: @test_vsubl_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
return vsubl_u8(a, b);
}
-// CHECK-LABEL: @test_vsubl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
return vsubl_u16(a, b);
}
-// CHECK-LABEL: @test_vsubl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
return vsubl_u32(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_s8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
return vsubl_high_s8(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_s16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
return vsubl_high_s16(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_s32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
return vsubl_high_s32(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_u8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
return vsubl_high_u8(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_u16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
return vsubl_high_u16(a, b);
}
-// CHECK-LABEL: @test_vsubl_high_u32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
return vsubl_high_u32(a, b);
}
-// CHECK-LABEL: @test_vsubw_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
return vsubw_s8(a, b);
}
-// CHECK-LABEL: @test_vsubw_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
return vsubw_s16(a, b);
}
-// CHECK-LABEL: @test_vsubw_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
return vsubw_s32(a, b);
}
-// CHECK-LABEL: @test_vsubw_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
return vsubw_u8(a, b);
}
-// CHECK-LABEL: @test_vsubw_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
return vsubw_u16(a, b);
}
-// CHECK-LABEL: @test_vsubw_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
return vsubw_u32(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_s8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
return vsubw_high_s8(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_s16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
return vsubw_high_s16(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_s32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
return vsubw_high_s32(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_u8(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
return vsubw_high_u8(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_u16(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
return vsubw_high_u16(a, b);
}
-// CHECK-LABEL: @test_vsubw_high_u32(
-// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
return vsubw_high_u32(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vaddhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VADDHN2_I]]
+//
int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
return vaddhn_s16(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vaddhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VADDHN2_I]]
+//
int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
return vaddhn_s32(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vaddhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VADDHN2_I]]
+//
int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
return vaddhn_s64(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vaddhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VADDHN2_I]]
+//
uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
return vaddhn_u16(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vaddhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VADDHN2_I]]
+//
uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
return vaddhn_u32(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VADDHN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vaddhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VADDHN2_I]]
+//
uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
return vaddhn_u64(a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vaddhn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
return vaddhn_high_s16(r, a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddhn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
return vaddhn_high_s32(r, a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddhn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
return vaddhn_high_s64(r, a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vaddhn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vaddhn_high_u16(r, a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vaddhn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vaddhn_high_u32(r, a, b);
}
-// CHECK-LABEL: @test_vaddhn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vaddhn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vaddhn_high_u64(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vraddhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRADDHN_V2_I]]
+//
int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
return vraddhn_s16(a, b);
}
-// CHECK-LABEL: @test_vraddhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vraddhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
return vraddhn_s32(a, b);
}
-// CHECK-LABEL: @test_vraddhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vraddhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
return vraddhn_s64(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vraddhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRADDHN_V2_I]]
+//
uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
return vraddhn_u16(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vraddhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
return vraddhn_u32(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vraddhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
return vraddhn_u64(a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vraddhn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
return vraddhn_high_s16(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vraddhn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
return vraddhn_high_s32(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vraddhn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
return vraddhn_high_s64(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vraddhn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vraddhn_high_u16(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vraddhn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vraddhn_high_u32(r, a, b);
}
-// CHECK-LABEL: @test_vraddhn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vraddhn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vraddhn_high_u64(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsubhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSUBHN2_I]]
+//
int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
return vsubhn_s16(a, b);
}
-// CHECK-LABEL: @test_vsubhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsubhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSUBHN2_I]]
+//
int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
return vsubhn_s32(a, b);
}
-// CHECK-LABEL: @test_vsubhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsubhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSUBHN2_I]]
+//
int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
return vsubhn_s64(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsubhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSUBHN2_I]]
+//
uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
return vsubhn_u16(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsubhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSUBHN2_I]]
+//
uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
return vsubhn_u32(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSUBHN2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsubhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSUBHN2_I]]
+//
uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
return vsubhn_u64(a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsubhn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
return vsubhn_high_s16(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubhn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
return vsubhn_high_s32(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubhn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
return vsubhn_high_s64(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsubhn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vsubhn_high_u16(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsubhn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vsubhn_high_u32(r, a, b);
}
-// CHECK-LABEL: @test_vsubhn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsubhn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vsubhn_high_u64(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrsubhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRSUBHN_V2_I]]
+//
int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
return vrsubhn_s16(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrsubhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
return vrsubhn_s32(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrsubhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
return vrsubhn_s64(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrsubhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRSUBHN_V2_I]]
+//
uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
return vrsubhn_u16(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrsubhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
return vrsubhn_u32(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrsubhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
return vrsubhn_u64(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrsubhn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
return vrsubhn_high_s16(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrsubhn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
return vrsubhn_high_s32(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrsubhn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
return vrsubhn_high_s64(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrsubhn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[R:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> [[R]], <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I_I]]
+//
uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vrsubhn_high_u16(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrsubhn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[R:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I_I]]
+//
uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vrsubhn_high_u32(r, a, b);
}
-// CHECK-LABEL: @test_vrsubhn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrsubhn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> [[R]], <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I_I]]
+//
uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vrsubhn_high_u64(r, a, b);
}
-// CHECK-LABEL: @test_vabdl_s8(
-// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I_I]]
+//
int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
return vabdl_s8(a, b);
}
-// CHECK-LABEL: @test_vabdl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I_I]]
+//
int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
return vabdl_s16(a, b);
}
-// CHECK-LABEL: @test_vabdl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabdl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I_I]]
+//
int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
return vabdl_s32(a, b);
}
-// CHECK-LABEL: @test_vabdl_u8(
-// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I_I]]
+//
uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
return vabdl_u8(a, b);
}
-// CHECK-LABEL: @test_vabdl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I_I]]
+//
uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
return vabdl_u16(a, b);
}
-// CHECK-LABEL: @test_vabdl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabdl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I_I]]
+//
uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
return vabdl_u32(a, b);
}
-// CHECK-LABEL: @test_vabal_s8(
-// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabal_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vabal_s8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vabal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vabal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u8(
-// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabal_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vabal_u8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabal_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vabal_u16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabal_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vabal_u32(a, b, c);
}
-// CHECK-LABEL: @test_vabdl_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdl_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I_I_I]]
+//
int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
return vabdl_high_s8(a, b);
}
-// CHECK-LABEL: @test_vabdl_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdl_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I_I_I]]
+//
int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
return vabdl_high_s16(a, b);
}
-// CHECK-LABEL: @test_vabdl_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabdl_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I_I_I]]
+//
int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
return vabdl_high_s32(a, b);
}
-// CHECK-LABEL: @test_vabdl_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabdl_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I_I_I]]
+//
uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
return vabdl_high_u8(a, b);
}
-// CHECK-LABEL: @test_vabdl_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabdl_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I_I_I]]
+//
uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
return vabdl_high_u16(a, b);
}
-// CHECK-LABEL: @test_vabdl_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabdl_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I_I_I]]
+//
uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
return vabdl_high_u32(a, b);
}
-// CHECK-LABEL: @test_vabal_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabal_high_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I_I]]
+//
int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
return vabal_high_s8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabal_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I_I]]
+//
int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
return vabal_high_s16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabal_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I_I]]
+//
int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
return vabal_high_s32(a, b, c);
}
-// CHECK-LABEL: @test_vabal_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabal_high_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I_I]]
+//
uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vabal_high_u8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabal_high_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I_I]]
+//
uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vabal_high_u16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabal_high_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I_I]]
+//
uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vabal_high_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmull_s8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
return vmull_s8(a, b);
}
-// CHECK-LABEL: @test_vmull_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmull_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
return vmull_s16(a, b);
}
-// CHECK-LABEL: @test_vmull_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmull_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
return vmull_s32(a, b);
}
-// CHECK-LABEL: @test_vmull_u8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
return vmull_u8(a, b);
}
-// CHECK-LABEL: @test_vmull_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmull_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
return vmull_u16(a, b);
}
-// CHECK-LABEL: @test_vmull_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmull_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
return vmull_u32(a, b);
}
-// CHECK-LABEL: @test_vmull_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: ret <8 x i16> [[VMULL_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I_I]]
+//
int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
return vmull_high_s8(a, b);
}
-// CHECK-LABEL: @test_vmull_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: ret <4 x i32> [[VMULL2_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmull_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
+//
int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
return vmull_high_s16(a, b);
}
-// CHECK-LABEL: @test_vmull_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: ret <2 x i64> [[VMULL2_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmull_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
+//
int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
return vmull_high_s32(a, b);
}
-// CHECK-LABEL: @test_vmull_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: ret <8 x i16> [[VMULL_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I_I]]
+//
uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
return vmull_high_u8(a, b);
}
-// CHECK-LABEL: @test_vmull_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: ret <4 x i32> [[VMULL2_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmull_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
+//
uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
return vmull_high_u16(a, b);
}
-// CHECK-LABEL: @test_vmull_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: ret <2 x i64> [[VMULL2_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmull_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
+//
uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
return vmull_high_u32(a, b);
}
-// CHECK-LABEL: @test_vmlal_s8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlal_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlal_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlal_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlal_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlal_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlal_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlal_high_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I_I]]
+//
int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
return vmlal_high_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlal_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I_I]]
+//
int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
return vmlal_high_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlal_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I_I]]
+//
int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
return vmlal_high_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlal_high_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I_I]]
+//
uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vmlal_high_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlal_high_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I_I]]
+//
uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vmlal_high_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlal_high_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[ADD_I_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I_I]]
+//
uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vmlal_high_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsl_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlsl_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsl_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlsl_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsl_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlsl_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsl_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlsl_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_s8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsl_high_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I_I]]
+//
int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
return vmlsl_high_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsl_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I_I]]
+//
int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
return vmlsl_high_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlsl_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I_I]]
+//
int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
return vmlsl_high_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_u8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmlsl_high_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> [[C]], <16 x i8> [[C]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I5_I]], <8 x i8> [[SHUFFLE_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I_I]]
+//
uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vmlsl_high_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_u16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmlsl_high_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I_I]]
+//
uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vmlsl_high_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_high_u32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmlsl_high_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]])
+// CHECK-NEXT: [[SUB_I_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I_I]]
+//
uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vmlsl_high_u32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmull_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmull_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
return vqdmull_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmull_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmull_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
return vqdmull_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmlal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmlal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
+//
int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmlal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
+//
int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmlsl_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
+//
int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmlsl_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
+//
int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmull_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmull_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
return vqdmull_high_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmull_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmull_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
return vqdmull_high_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmlal_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
-// CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmlal_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]]
+//
int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
return vqdmlal_high_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlal_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
-// CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmlal_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]]
+//
int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
return vqdmlal_high_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_high_s16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
-// CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqdmlsl_high_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> [[C]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]]
+//
int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
return vqdmlsl_high_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_high_s32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
-// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
-// CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqdmlsl_high_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> [[C]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I5_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]]
+//
int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
return vqdmlsl_high_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmull_p8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
return vmull_p8(a, b);
}
-// CHECK-LABEL: @test_vmull_high_p8(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
-// CHECK: ret <8 x i16> [[VMULL_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmull_high_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I5:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I5]], <8 x i8> [[SHUFFLE_I]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I_I]]
+//
poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
return vmull_high_p8(a, b);
}
-// CHECK-LABEL: @test_vaddd_s64(
-// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
-// CHECK: ret i64 [[VADDD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDD_I:%.*]] = add i64 [[A]], [[B]]
+// CHECK-NEXT: ret i64 [[VADDD_I]]
+//
int64_t test_vaddd_s64(int64_t a, int64_t b) {
return vaddd_s64(a, b);
}
-// CHECK-LABEL: @test_vaddd_u64(
-// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
-// CHECK: ret i64 [[VADDD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDD_I:%.*]] = add i64 [[A]], [[B]]
+// CHECK-NEXT: ret i64 [[VADDD_I]]
+//
uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
return vaddd_u64(a, b);
}
-// CHECK-LABEL: @test_vsubd_s64(
-// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
-// CHECK: ret i64 [[VSUBD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vsubd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSUBD_I:%.*]] = sub i64 [[A]], [[B]]
+// CHECK-NEXT: ret i64 [[VSUBD_I]]
+//
int64_t test_vsubd_s64(int64_t a, int64_t b) {
return vsubd_s64(a, b);
}
-// CHECK-LABEL: @test_vsubd_u64(
-// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
-// CHECK: ret i64 [[VSUBD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vsubd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSUBD_I:%.*]] = sub i64 [[A]], [[B]]
+// CHECK-NEXT: ret i64 [[VSUBD_I]]
+//
uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
return vsubd_u64(a, b);
}
-// CHECK-LABEL: @test_vqaddb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqaddb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
int8_t test_vqaddb_s8(int8_t a, int8_t b) {
return vqaddb_s8(a, b);
}
-// CHECK-LABEL: @test_vqaddh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqaddh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqaddh_s16(int16_t a, int16_t b) {
return vqaddh_s16(a, b);
}
-// CHECK-LABEL: @test_vqadds_s32(
-// CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQADDS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqadds_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQADDS_S32_I]]
+//
int32_t test_vqadds_s32(int32_t a, int32_t b) {
return vqadds_s32(a, b);
}
-// CHECK-LABEL: @test_vqaddd_s64(
-// CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQADDD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqaddd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQADDD_S64_I]]
+//
int64_t test_vqaddd_s64(int64_t a, int64_t b) {
return vqaddd_s64(a, b);
}
-// CHECK-LABEL: @test_vqaddb_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqaddb_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
return vqaddb_u8(a, b);
}
-// CHECK-LABEL: @test_vqaddh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqaddh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
return vqaddh_u16(a, b);
}
-// CHECK-LABEL: @test_vqadds_u32(
-// CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQADDS_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqadds_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQADDS_U32_I]]
+//
uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
return vqadds_u32(a, b);
}
-// CHECK-LABEL: @test_vqaddd_u64(
-// CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQADDD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqaddd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQADDD_U64_I]]
+//
uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
return vqaddd_u64(a, b);
}
-// CHECK-LABEL: @test_vqsubb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqsubb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
int8_t test_vqsubb_s8(int8_t a, int8_t b) {
return vqsubb_s8(a, b);
}
-// CHECK-LABEL: @test_vqsubh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqsubh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqsubh_s16(int16_t a, int16_t b) {
return vqsubh_s16(a, b);
}
-// CHECK-LABEL: @test_vqsubs_s32(
-// CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQSUBS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqsubs_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQSUBS_S32_I]]
+//
int32_t test_vqsubs_s32(int32_t a, int32_t b) {
return vqsubs_s32(a, b);
}
-// CHECK-LABEL: @test_vqsubd_s64(
-// CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQSUBD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqsubd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQSUBD_S64_I]]
+//
int64_t test_vqsubd_s64(int64_t a, int64_t b) {
return vqsubd_s64(a, b);
}
-// CHECK-LABEL: @test_vqsubb_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqsubb_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
return vqsubb_u8(a, b);
}
-// CHECK-LABEL: @test_vqsubh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqsubh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
return vqsubh_u16(a, b);
}
-// CHECK-LABEL: @test_vqsubs_u32(
-// CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQSUBS_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqsubs_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQSUBS_U32_I]]
+//
uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
return vqsubs_u32(a, b);
}
-// CHECK-LABEL: @test_vqsubd_u64(
-// CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQSUBD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqsubd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQSUBD_U64_I]]
+//
uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
return vqsubd_u64(a, b);
}
-// CHECK-LABEL: @test_vshld_s64(
-// CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VSHLD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vshld_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VSHLD_S64_I]]
+//
int64_t test_vshld_s64(int64_t a, int64_t b) {
return vshld_s64(a, b);
}
-// CHECK-LABEL: @test_vshld_u64(
-// CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VSHLD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vshld_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VSHLD_U64_I]]
+//
uint64_t test_vshld_u64(uint64_t a, int64_t b) {
return vshld_u64(a, b);
}
-// CHECK-LABEL: @test_vqshlb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqshlb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
int8_t test_vqshlb_s8(int8_t a, int8_t b) {
return vqshlb_s8(a, b);
}
-// CHECK-LABEL: @test_vqshlh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqshlh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqshlh_s16(int16_t a, int16_t b) {
return vqshlh_s16(a, b);
}
-// CHECK-LABEL: @test_vqshls_s32(
-// CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQSHLS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqshls_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQSHLS_S32_I]]
+//
int32_t test_vqshls_s32(int32_t a, int32_t b) {
return vqshls_s32(a, b);
}
-// CHECK-LABEL: @test_vqshld_s64(
-// CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQSHLD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqshld_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQSHLD_S64_I]]
+//
int64_t test_vqshld_s64(int64_t a, int64_t b) {
return vqshld_s64(a, b);
}
-// CHECK-LABEL: @test_vqshlb_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqshlb_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
uint8_t test_vqshlb_u8(uint8_t a, int8_t b) {
return vqshlb_u8(a, b);
}
-// CHECK-LABEL: @test_vqshlh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqshlh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
uint16_t test_vqshlh_u16(uint16_t a, int16_t b) {
return vqshlh_u16(a, b);
}
-// CHECK-LABEL: @test_vqshls_u32(
-// CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQSHLS_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqshls_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQSHLS_U32_I]]
+//
uint32_t test_vqshls_u32(uint32_t a, int32_t b) {
return vqshls_u32(a, b);
}
-// CHECK-LABEL: @test_vqshld_u64(
-// CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQSHLD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqshld_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQSHLD_U64_I]]
+//
uint64_t test_vqshld_u64(uint64_t a, int64_t b) {
return vqshld_u64(a, b);
}
-// CHECK-LABEL: @test_vrshld_s64(
-// CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VRSHLD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vrshld_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VRSHLD_S64_I]]
+//
int64_t test_vrshld_s64(int64_t a, int64_t b) {
return vrshld_s64(a, b);
}
-// CHECK-LABEL: @test_vrshld_u64(
-// CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VRSHLD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vrshld_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VRSHLD_U64_I]]
+//
uint64_t test_vrshld_u64(uint64_t a, int64_t b) {
return vrshld_u64(a, b);
}
-// CHECK-LABEL: @test_vqrshlb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqrshlb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
return vqrshlb_s8(a, b);
}
-// CHECK-LABEL: @test_vqrshlh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqrshlh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
return vqrshlh_s16(a, b);
}
-// CHECK-LABEL: @test_vqrshls_s32(
-// CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQRSHLS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqrshls_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQRSHLS_S32_I]]
+//
int32_t test_vqrshls_s32(int32_t a, int32_t b) {
return vqrshls_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshld_s64(
-// CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQRSHLD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqrshld_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQRSHLD_S64_I]]
+//
int64_t test_vqrshld_s64(int64_t a, int64_t b) {
return vqrshld_s64(a, b);
}
-// CHECK-LABEL: @test_vqrshlb_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vqrshlb_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) {
return vqrshlb_u8(a, b);
}
-// CHECK-LABEL: @test_vqrshlh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqrshlh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) {
return vqrshlh_u16(a, b);
}
-// CHECK-LABEL: @test_vqrshls_u32(
-// CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQRSHLS_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqrshls_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQRSHLS_U32_I]]
+//
uint32_t test_vqrshls_u32(uint32_t a, int32_t b) {
return vqrshls_u32(a, b);
}
-// CHECK-LABEL: @test_vqrshld_u64(
-// CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VQRSHLD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqrshld_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VQRSHLD_U64_I]]
+//
uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {
return vqrshld_u64(a, b);
}
-// CHECK-LABEL: @test_vpaddd_s64(
-// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
-// CHECK: ret i64 [[VPADDD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vpaddd_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT: ret i64 [[VPADDD_S64_I]]
+//
int64_t test_vpaddd_s64(int64x2_t a) {
return vpaddd_s64(a);
}
-// CHECK-LABEL: @test_vpadds_f32(
-// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
-// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
-// CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
-// CHECK: ret float [[VPADDD_I]]
+// CHECK-LABEL: define dso_local float @test_vpadds_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE0_I:%.*]] = extractelement <2 x float> [[A]], i64 0
+// CHECK-NEXT: [[LANE1_I:%.*]] = extractelement <2 x float> [[A]], i64 1
+// CHECK-NEXT: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
+// CHECK-NEXT: ret float [[VPADDD_I]]
+//
float32_t test_vpadds_f32(float32x2_t a) {
return vpadds_f32(a);
}
-// CHECK-LABEL: @test_vpaddd_f64(
-// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
-// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
-// CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
-// CHECK: ret double [[VPADDD_I]]
+// CHECK-LABEL: define dso_local double @test_vpaddd_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE0_I:%.*]] = extractelement <2 x double> [[A]], i64 0
+// CHECK-NEXT: [[LANE1_I:%.*]] = extractelement <2 x double> [[A]], i64 1
+// CHECK-NEXT: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
+// CHECK-NEXT: ret double [[VPADDD_I]]
+//
float64_t test_vpaddd_f64(float64x2_t a) {
return vpaddd_f64(a);
}
-// CHECK-LABEL: @test_vpmaxnms_f32(
-// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VPMAXNMS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vpmaxnms_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VPMAXNMS_F32_I]]
+//
float32_t test_vpmaxnms_f32(float32x2_t a) {
return vpmaxnms_f32(a);
}
-// CHECK-LABEL: @test_vpmaxnmqd_f64(
-// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VPMAXNMQD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vpmaxnmqd_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VPMAXNMQD_F64_I]]
+//
float64_t test_vpmaxnmqd_f64(float64x2_t a) {
return vpmaxnmqd_f64(a);
}
-// CHECK-LABEL: @test_vpmaxs_f32(
-// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VPMAXS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vpmaxs_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VPMAXS_F32_I]]
+//
float32_t test_vpmaxs_f32(float32x2_t a) {
return vpmaxs_f32(a);
}
-// CHECK-LABEL: @test_vpmaxqd_f64(
-// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VPMAXQD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vpmaxqd_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VPMAXQD_F64_I]]
+//
float64_t test_vpmaxqd_f64(float64x2_t a) {
return vpmaxqd_f64(a);
}
-// CHECK-LABEL: @test_vpminnms_f32(
-// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VPMINNMS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vpminnms_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VPMINNMS_F32_I]]
+//
float32_t test_vpminnms_f32(float32x2_t a) {
return vpminnms_f32(a);
}
-// CHECK-LABEL: @test_vpminnmqd_f64(
-// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VPMINNMQD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vpminnmqd_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VPMINNMQD_F64_I]]
+//
float64_t test_vpminnmqd_f64(float64x2_t a) {
return vpminnmqd_f64(a);
}
-// CHECK-LABEL: @test_vpmins_f32(
-// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VPMINS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vpmins_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VPMINS_F32_I]]
+//
float32_t test_vpmins_f32(float32x2_t a) {
return vpmins_f32(a);
}
-// CHECK-LABEL: @test_vpminqd_f64(
-// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VPMINQD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vpminqd_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VPMINQD_F64_I]]
+//
float64_t test_vpminqd_f64(float64x2_t a) {
return vpminqd_f64(a);
}
-// CHECK-LABEL: @test_vqdmulhh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqdmulhh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
return vqdmulhh_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulhs_s32(
-// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmulhs_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQDMULHS_S32_I]]
+//
int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
return vqdmulhs_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vqrdmulhh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
return vqrdmulhh_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhs_s32(
-// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VQRDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqrdmulhs_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VQRDMULHS_S32_I]]
+//
int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
return vqrdmulhs_s32(a, b);
}
-// CHECK-LABEL: @test_vmulxs_f32(
-// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
-// CHECK: ret float [[VMULXS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vmulxs_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret float [[VMULXS_F32_I]]
+//
float32_t test_vmulxs_f32(float32_t a, float32_t b) {
return vmulxs_f32(a, b);
}
-// CHECK-LABEL: @test_vmulxd_f64(
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
-// CHECK: ret double [[VMULXD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vmulxd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret double [[VMULXD_F64_I]]
+//
float64_t test_vmulxd_f64(float64_t a, float64_t b) {
return vmulxd_f64(a, b);
}
-// CHECK-LABEL: @test_vmulx_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VMULX2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]])
+// CHECK-NEXT: ret <1 x double> [[VMULX2_I]]
+//
float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
return vmulx_f64(a, b);
}
-// CHECK-LABEL: @test_vrecpss_f32(
-// CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
-// CHECK: ret float [[VRECPS_I]]
+// CHECK-LABEL: define dso_local float @test_vrecpss_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret float [[VRECPS_I]]
+//
float32_t test_vrecpss_f32(float32_t a, float32_t b) {
return vrecpss_f32(a, b);
}
-// CHECK-LABEL: @test_vrecpsd_f64(
-// CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
-// CHECK: ret double [[VRECPS_I]]
+// CHECK-LABEL: define dso_local double @test_vrecpsd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret double [[VRECPS_I]]
+//
float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
return vrecpsd_f64(a, b);
}
-// CHECK-LABEL: @test_vrsqrtss_f32(
-// CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
-// CHECK: ret float [[VRSQRTSS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vrsqrtss_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret float [[VRSQRTSS_F32_I]]
+//
float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
return vrsqrtss_f32(a, b);
}
-// CHECK-LABEL: @test_vrsqrtsd_f64(
-// CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
-// CHECK: ret double [[VRSQRTSD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vrsqrtsd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret double [[VRSQRTSD_F64_I]]
+//
float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
return vrsqrtsd_f64(a, b);
}
-// CHECK-LABEL: @test_vcvts_f32_s32(
-// CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
-// CHECK: ret float [[TMP0]]
+// CHECK-LABEL: define dso_local float @test_vcvts_f32_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = sitofp i32 [[A]] to float
+// CHECK-NEXT: ret float [[TMP0]]
+//
float32_t test_vcvts_f32_s32(int32_t a) {
return vcvts_f32_s32(a);
}
-// CHECK-LABEL: @test_vcvtd_f64_s64(
-// CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
-// CHECK: ret double [[TMP0]]
+// CHECK-LABEL: define dso_local double @test_vcvtd_f64_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = sitofp i64 [[A]] to double
+// CHECK-NEXT: ret double [[TMP0]]
+//
float64_t test_vcvtd_f64_s64(int64_t a) {
return vcvtd_f64_s64(a);
}
-// CHECK-LABEL: @test_vcvts_f32_u32(
-// CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
-// CHECK: ret float [[TMP0]]
+// CHECK-LABEL: define dso_local float @test_vcvts_f32_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = uitofp i32 [[A]] to float
+// CHECK-NEXT: ret float [[TMP0]]
+//
float32_t test_vcvts_f32_u32(uint32_t a) {
return vcvts_f32_u32(a);
}
-// CHECK-LABEL: @test_vcvtd_f64_u64(
-// CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
-// CHECK: ret double [[TMP0]]
+// CHECK-LABEL: define dso_local double @test_vcvtd_f64_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = uitofp i64 [[A]] to double
+// CHECK-NEXT: ret double [[TMP0]]
+//
float64_t test_vcvtd_f64_u64(uint64_t a) {
return vcvtd_f64_u64(a);
}
-// CHECK-LABEL: @test_vrecpes_f32(
-// CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
-// CHECK: ret float [[VRECPES_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vrecpes_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float [[A]])
+// CHECK-NEXT: ret float [[VRECPES_F32_I]]
+//
float32_t test_vrecpes_f32(float32_t a) {
return vrecpes_f32(a);
}
-// CHECK-LABEL: @test_vrecped_f64(
-// CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
-// CHECK: ret double [[VRECPED_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vrecped_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double [[A]])
+// CHECK-NEXT: ret double [[VRECPED_F64_I]]
+//
float64_t test_vrecped_f64(float64_t a) {
return vrecped_f64(a);
}
-// CHECK-LABEL: @test_vrecpxs_f32(
-// CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
-// CHECK: ret float [[VRECPXS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vrecpxs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float [[A]])
+// CHECK-NEXT: ret float [[VRECPXS_F32_I]]
+//
float32_t test_vrecpxs_f32(float32_t a) {
return vrecpxs_f32(a);
}
-// CHECK-LABEL: @test_vrecpxd_f64(
-// CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
-// CHECK: ret double [[VRECPXD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vrecpxd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double [[A]])
+// CHECK-NEXT: ret double [[VRECPXD_F64_I]]
+//
float64_t test_vrecpxd_f64(float64_t a) {
return vrecpxd_f64(a);
}
-// CHECK-LABEL: @test_vrsqrte_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrsqrte_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]])
+// CHECK-NEXT: ret <2 x i32> [[VRSQRTE_V1_I]]
+//
uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
return vrsqrte_u32(a);
}
-// CHECK-LABEL: @test_vrsqrteq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrsqrteq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]])
+// CHECK-NEXT: ret <4 x i32> [[VRSQRTEQ_V1_I]]
+//
uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
return vrsqrteq_u32(a);
}
-// CHECK-LABEL: @test_vrsqrtes_f32(
-// CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
-// CHECK: ret float [[VRSQRTES_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vrsqrtes_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float [[A]])
+// CHECK-NEXT: ret float [[VRSQRTES_F32_I]]
+//
float32_t test_vrsqrtes_f32(float32_t a) {
return vrsqrtes_f32(a);
}
-// CHECK-LABEL: @test_vrsqrted_f64(
-// CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
-// CHECK: ret double [[VRSQRTED_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vrsqrted_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double [[A]])
+// CHECK-NEXT: ret double [[VRSQRTED_F64_I]]
+//
float64_t test_vrsqrted_f64(float64_t a) {
return vrsqrted_f64(a);
}
-// CHECK-LABEL: @test_vld1q_u8(
-// CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
-// CHECK: ret <16 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vld1q_u8(uint8_t const *a) {
return vld1q_u8(a);
}
-// CHECK-LABEL: @test_vld1q_u16(
-// CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
-// CHECK: ret <8 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vld1q_u16(uint16_t const *a) {
return vld1q_u16(a);
}
-// CHECK-LABEL: @test_vld1q_u32(
-// CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
-// CHECK: ret <4 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 4
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vld1q_u32(uint32_t const *a) {
return vld1q_u32(a);
}
-// CHECK-LABEL: @test_vld1q_u64(
-// CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
-// CHECK: ret <2 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[A]], align 8
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vld1q_u64(uint64_t const *a) {
return vld1q_u64(a);
}
-// CHECK-LABEL: @test_vld1q_s8(
-// CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
-// CHECK: ret <16 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vld1q_s8(int8_t const *a) {
return vld1q_s8(a);
}
-// CHECK-LABEL: @test_vld1q_s16(
-// CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
-// CHECK: ret <8 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vld1q_s16(int16_t const *a) {
return vld1q_s16(a);
}
-// CHECK-LABEL: @test_vld1q_s32(
-// CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
-// CHECK: ret <4 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 4
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vld1q_s32(int32_t const *a) {
return vld1q_s32(a);
}
-// CHECK-LABEL: @test_vld1q_s64(
-// CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
-// CHECK: ret <2 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[A]], align 8
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vld1q_s64(int64_t const *a) {
return vld1q_s64(a);
}
-// CHECK-LABEL: @test_vld1q_f16(
-// CHECK: [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
-// CHECK: ret <8 x half> [[TMP2]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vld1q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 2
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vld1q_f16(float16_t const *a) {
return vld1q_f16(a);
}
-// CHECK-LABEL: @test_vld1q_f32(
-// CHECK: [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4
-// CHECK: ret <4 x float> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vld1q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vld1q_f32(float32_t const *a) {
return vld1q_f32(a);
}
-// CHECK-LABEL: @test_vld1q_f64(
-// CHECK: [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8
-// CHECK: ret <2 x double> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vld1q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
+// CHECK-NEXT: ret <2 x double> [[TMP0]]
+//
float64x2_t test_vld1q_f64(float64_t const *a) {
return vld1q_f64(a);
}
-// CHECK-LABEL: @test_vld1q_p8(
-// CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
-// CHECK: ret <16 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vld1q_p8(poly8_t const *a) {
return vld1q_p8(a);
}
-// CHECK-LABEL: @test_vld1q_p16(
-// CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
-// CHECK: ret <8 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vld1q_p16(poly16_t const *a) {
return vld1q_p16(a);
}
-// CHECK-LABEL: @test_vld1_u8(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vld1_u8(uint8_t const *a) {
return vld1_u8(a);
}
-// CHECK-LABEL: @test_vld1_u16(
-// CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
-// CHECK: ret <4 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vld1_u16(uint16_t const *a) {
return vld1_u16(a);
}
-// CHECK-LABEL: @test_vld1_u32(
-// CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
-// CHECK: ret <2 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 4
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vld1_u32(uint32_t const *a) {
return vld1_u32(a);
}
-// CHECK-LABEL: @test_vld1_u64(
-// CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
-// CHECK: ret <1 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[A]], align 8
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
uint64x1_t test_vld1_u64(uint64_t const *a) {
return vld1_u64(a);
}
-// CHECK-LABEL: @test_vld1_s8(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vld1_s8(int8_t const *a) {
return vld1_s8(a);
}
-// CHECK-LABEL: @test_vld1_s16(
-// CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
-// CHECK: ret <4 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vld1_s16(int16_t const *a) {
return vld1_s16(a);
}
-// CHECK-LABEL: @test_vld1_s32(
-// CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
-// CHECK: ret <2 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 4
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vld1_s32(int32_t const *a) {
return vld1_s32(a);
}
-// CHECK-LABEL: @test_vld1_s64(
-// CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
-// CHECK: ret <1 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[A]], align 8
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
int64x1_t test_vld1_s64(int64_t const *a) {
return vld1_s64(a);
}
-// CHECK-LABEL: @test_vld1_f16(
-// CHECK: [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2
-// CHECK: ret <4 x half> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vld1_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[A]], align 2
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vld1_f16(float16_t const *a) {
return vld1_f16(a);
}
-// CHECK-LABEL: @test_vld1_f32(
-// CHECK: [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4
-// CHECK: ret <2 x float> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vld1_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vld1_f32(float32_t const *a) {
return vld1_f32(a);
}
-// CHECK-LABEL: @test_vld1_f64(
-// CHECK: [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8
-// CHECK: ret <1 x double> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vld1_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[A]], align 8
+// CHECK-NEXT: ret <1 x double> [[TMP0]]
+//
float64x1_t test_vld1_f64(float64_t const *a) {
return vld1_f64(a);
}
-// CHECK-LABEL: @test_vld1_p8(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vld1_p8(poly8_t const *a) {
return vld1_p8(a);
}
-// CHECK-LABEL: @test_vld1_p16(
-// CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
-// CHECK: ret <4 x i16> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 2
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vld1_p16(poly16_t const *a) {
return vld1_p16(a);
}
-// CHECK-LABEL: @test_vld1_u8_void(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_u8_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vld1_u8_void(void *a) {
return vld1_u8(a);
}
-// CHECK-LABEL: @test_vld1_u16_void(
-// CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
-// CHECK: ret <4 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_u16_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 1
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vld1_u16_void(void *a) {
return vld1_u16(a);
}
-// CHECK-LABEL: @test_vld1_u32_void(
-// CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
-// CHECK: ret <2 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_u32_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 1
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vld1_u32_void(void *a) {
return vld1_u32(a);
}
-// CHECK-LABEL: @test_vld1_u64_void(
-// CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
-// CHECK: ret <1 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_u64_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[A]], align 1
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
uint64x1_t test_vld1_u64_void(void *a) {
return vld1_u64(a);
}
-// CHECK-LABEL: @test_vld1_s8_void(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_s8_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vld1_s8_void(void *a) {
return vld1_s8(a);
}
-// CHECK-LABEL: @test_vld1_s16_void(
-// CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
-// CHECK: ret <4 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_s16_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 1
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vld1_s16_void(void *a) {
return vld1_s16(a);
}
-// CHECK-LABEL: @test_vld1_s32_void(
-// CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
-// CHECK: ret <2 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_s32_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 1
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vld1_s32_void(void *a) {
return vld1_s32(a);
}
-// CHECK-LABEL: @test_vld1_s64_void(
-// CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
-// CHECK: ret <1 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_s64_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[A]], align 1
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
int64x1_t test_vld1_s64_void(void *a) {
return vld1_s64(a);
}
-// CHECK-LABEL: @test_vld1_f16_void(
-// CHECK: [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1
-// CHECK: ret <4 x half> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vld1_f16_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[A]], align 1
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vld1_f16_void(void *a) {
return vld1_f16(a);
}
-// CHECK-LABEL: @test_vld1_f32_void(
-// CHECK: [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1
-// CHECK: ret <2 x float> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vld1_f32_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 1
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vld1_f32_void(void *a) {
return vld1_f32(a);
}
-// CHECK-LABEL: @test_vld1_f64_void(
-// CHECK: [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1
-// CHECK: ret <1 x double> [[TMP1]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vld1_f64_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[A]], align 1
+// CHECK-NEXT: ret <1 x double> [[TMP0]]
+//
float64x1_t test_vld1_f64_void(void *a) {
return vld1_f64(a);
}
-// CHECK-LABEL: @test_vld1_p8_void(
-// CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
-// CHECK: ret <8 x i8> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_p8_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vld1_p8_void(void *a) {
return vld1_p8(a);
}
-// CHECK-LABEL: @test_vld1_p16_void(
-// CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
-// CHECK: ret <4 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_p16_void(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 1
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vld1_p16_void(void *a) {
return vld1_p16(a);
}
-// CHECK-LABEL: @test_vld2q_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vld2q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
return vld2q_u8(a);
}
-// CHECK-LABEL: @test_vld2q_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vld2q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
return vld2q_u16(a);
}
-// CHECK-LABEL: @test_vld2q_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vld2q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
return vld2q_u32(a);
}
-// CHECK-LABEL: @test_vld2q_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x2_t @test_vld2q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
return vld2q_u64(a);
}
-// CHECK-LABEL: @test_vld2q_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vld2q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x16x2_t test_vld2q_s8(int8_t const *a) {
return vld2q_s8(a);
}
-// CHECK-LABEL: @test_vld2q_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vld2q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x8x2_t test_vld2q_s16(int16_t const *a) {
return vld2q_s16(a);
}
-// CHECK-LABEL: @test_vld2q_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vld2q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x4x2_t test_vld2q_s32(int32_t const *a) {
return vld2q_s32(a);
}
-// CHECK-LABEL: @test_vld2q_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x2_t @test_vld2q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int64x2x2_t test_vld2q_s64(int64_t const *a) {
return vld2q_s64(a);
}
-// CHECK-LABEL: @test_vld2q_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a)
-// CHECK: store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x8x2_t @test_vld2q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] poison, <8 x half> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float16x8x2_t test_vld2q_f16(float16_t const *a) {
return vld2q_f16(a);
}
-// CHECK-LABEL: @test_vld2q_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a)
-// CHECK: store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vld2q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x4x2_t test_vld2q_f32(float32_t const *a) {
return vld2q_f32(a);
}
-// CHECK-LABEL: @test_vld2q_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x2_t @test_vld2q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T:%.*]] poison, <2 x double> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x2x2_t test_vld2q_f64(float64_t const *a) {
return vld2q_f64(a);
}
-// CHECK-LABEL: @test_vld2q_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vld2q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
return vld2q_p8(a);
}
-// CHECK-LABEL: @test_vld2q_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vld2q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
return vld2q_p16(a);
}
-// CHECK-LABEL: @test_vld2_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vld2_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x8x2_t test_vld2_u8(uint8_t const *a) {
return vld2_u8(a);
}
-// CHECK-LABEL: @test_vld2_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vld2_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x4x2_t test_vld2_u16(uint16_t const *a) {
return vld2_u16(a);
}
-// CHECK-LABEL: @test_vld2_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vld2_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x2x2_t test_vld2_u32(uint32_t const *a) {
return vld2_u32(a);
}
-// CHECK-LABEL: @test_vld2_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x2_t @test_vld2_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint64x1x2_t test_vld2_u64(uint64_t const *a) {
return vld2_u64(a);
}
-// CHECK-LABEL: @test_vld2_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vld2_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x8x2_t test_vld2_s8(int8_t const *a) {
return vld2_s8(a);
}
-// CHECK-LABEL: @test_vld2_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vld2_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x4x2_t test_vld2_s16(int16_t const *a) {
return vld2_s16(a);
}
-// CHECK-LABEL: @test_vld2_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vld2_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x2x2_t test_vld2_s32(int32_t const *a) {
return vld2_s32(a);
}
-// CHECK-LABEL: @test_vld2_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x1x2_t @test_vld2_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int64x1x2_t test_vld2_s64(int64_t const *a) {
return vld2_s64(a);
}
-// CHECK-LABEL: @test_vld2_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a)
-// CHECK: store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x4x2_t @test_vld2_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T:%.*]] poison, <4 x half> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float16x4x2_t test_vld2_f16(float16_t const *a) {
return vld2_f16(a);
}
-// CHECK-LABEL: @test_vld2_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a)
-// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vld2_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x2x2_t test_vld2_f32(float32_t const *a) {
return vld2_f32(a);
}
-// CHECK-LABEL: @test_vld2_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x2_t @test_vld2_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x1x2_t test_vld2_f64(float64_t const *a) {
return vld2_f64(a);
}
-// CHECK-LABEL: @test_vld2_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x2_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vld2_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x8x2_t test_vld2_p8(poly8_t const *a) {
return vld2_p8(a);
}
-// CHECK-LABEL: @test_vld2_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vld2_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x4x2_t test_vld2_p16(poly16_t const *a) {
return vld2_p16(a);
}
-// CHECK-LABEL: @test_vld3q_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x3_t @test_vld3q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
return vld3q_u8(a);
}
-// CHECK-LABEL: @test_vld3q_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x3_t @test_vld3q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
return vld3q_u16(a);
}
-// CHECK-LABEL: @test_vld3q_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x3_t @test_vld3q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T:%.*]] poison, <4 x i32> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
return vld3q_u32(a);
}
-// CHECK-LABEL: @test_vld3q_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x3_t @test_vld3q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
return vld3q_u64(a);
}
-// CHECK-LABEL: @test_vld3q_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x16x3_t @test_vld3q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int8x16x3_t test_vld3q_s8(int8_t const *a) {
return vld3q_s8(a);
}
-// CHECK-LABEL: @test_vld3q_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x8x3_t @test_vld3q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int16x8x3_t test_vld3q_s16(int16_t const *a) {
return vld3q_s16(a);
}
-// CHECK-LABEL: @test_vld3q_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x4x3_t @test_vld3q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T:%.*]] poison, <4 x i32> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int32x4x3_t test_vld3q_s32(int32_t const *a) {
return vld3q_s32(a);
}
-// CHECK-LABEL: @test_vld3q_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x3_t @test_vld3q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int64x2x3_t test_vld3q_s64(int64_t const *a) {
return vld3q_s64(a);
}
-// CHECK-LABEL: @test_vld3q_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a)
-// CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x8x3_t @test_vld3q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T:%.*]] poison, <8 x half> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x half> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float16x8x3_t test_vld3q_f16(float16_t const *a) {
return vld3q_f16(a);
}
-// CHECK-LABEL: @test_vld3q_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a)
-// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x4x3_t @test_vld3q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T:%.*]] poison, <4 x float> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x float> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float32x4x3_t test_vld3q_f32(float32_t const *a) {
return vld3q_f32(a);
}
-// CHECK-LABEL: @test_vld3q_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x3_t @test_vld3q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T:%.*]] poison, <2 x double> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x2x3_t test_vld3q_f64(float64_t const *a) {
return vld3q_f64(a);
}
-// CHECK-LABEL: @test_vld3q_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x3_t @test_vld3q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
return vld3q_p8(a);
}
-// CHECK-LABEL: @test_vld3q_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x3_t @test_vld3q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
return vld3q_p16(a);
}
-// CHECK-LABEL: @test_vld3_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x3_t @test_vld3_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint8x8x3_t test_vld3_u8(uint8_t const *a) {
return vld3_u8(a);
}
-// CHECK-LABEL: @test_vld3_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x3_t @test_vld3_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint16x4x3_t test_vld3_u16(uint16_t const *a) {
return vld3_u16(a);
}
-// CHECK-LABEL: @test_vld3_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x3_t @test_vld3_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T:%.*]] poison, <2 x i32> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint32x2x3_t test_vld3_u32(uint32_t const *a) {
return vld3_u32(a);
}
-// CHECK-LABEL: @test_vld3_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x3_t @test_vld3_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint64x1x3_t test_vld3_u64(uint64_t const *a) {
return vld3_u64(a);
}
-// CHECK-LABEL: @test_vld3_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x8x3_t @test_vld3_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int8x8x3_t test_vld3_s8(int8_t const *a) {
return vld3_s8(a);
}
-// CHECK-LABEL: @test_vld3_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x4x3_t @test_vld3_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int16x4x3_t test_vld3_s16(int16_t const *a) {
return vld3_s16(a);
}
-// CHECK-LABEL: @test_vld3_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x2x3_t @test_vld3_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T:%.*]] poison, <2 x i32> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int32x2x3_t test_vld3_s32(int32_t const *a) {
return vld3_s32(a);
}
-// CHECK-LABEL: @test_vld3_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x1x3_t @test_vld3_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int64x1x3_t test_vld3_s64(int64_t const *a) {
return vld3_s64(a);
}
-// CHECK-LABEL: @test_vld3_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a)
-// CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x4x3_t @test_vld3_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T:%.*]] poison, <4 x half> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x half> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float16x4x3_t test_vld3_f16(float16_t const *a) {
return vld3_f16(a);
}
-// CHECK-LABEL: @test_vld3_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a)
-// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x2x3_t @test_vld3_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T:%.*]] poison, <2 x float> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x float> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float32x2x3_t test_vld3_f32(float32_t const *a) {
return vld3_f32(a);
}
-// CHECK-LABEL: @test_vld3_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x3_t @test_vld3_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x1x3_t test_vld3_f64(float64_t const *a) {
return vld3_f64(a);
}
-// CHECK-LABEL: @test_vld3_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x3_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x3_t @test_vld3_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly8x8x3_t test_vld3_p8(poly8_t const *a) {
return vld3_p8(a);
}
-// CHECK-LABEL: @test_vld3_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x3_t @test_vld3_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly16x4x3_t test_vld3_p16(poly16_t const *a) {
return vld3_p16(a);
}
-// CHECK-LABEL: @test_vld4q_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x4_t @test_vld4q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
return vld4q_u8(a);
}
-// CHECK-LABEL: @test_vld4q_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x4_t @test_vld4q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
return vld4q_u16(a);
}
-// CHECK-LABEL: @test_vld4q_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x4_t @test_vld4q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T:%.*]] poison, <4 x i32> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i32> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
return vld4q_u32(a);
}
-// CHECK-LABEL: @test_vld4q_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x4_t @test_vld4q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
return vld4q_u64(a);
}
-// CHECK-LABEL: @test_vld4q_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x16x4_t @test_vld4q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int8x16x4_t test_vld4q_s8(int8_t const *a) {
return vld4q_s8(a);
}
-// CHECK-LABEL: @test_vld4q_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x8x4_t @test_vld4q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int16x8x4_t test_vld4q_s16(int16_t const *a) {
return vld4q_s16(a);
}
-// CHECK-LABEL: @test_vld4q_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x4x4_t @test_vld4q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T:%.*]] poison, <4 x i32> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i32> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int32x4x4_t test_vld4q_s32(int32_t const *a) {
return vld4q_s32(a);
}
-// CHECK-LABEL: @test_vld4q_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x4_t @test_vld4q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int64x2x4_t test_vld4q_s64(int64_t const *a) {
return vld4q_s64(a);
}
-// CHECK-LABEL: @test_vld4q_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a)
-// CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x8x4_t @test_vld4q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T:%.*]] poison, <8 x half> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x half> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x half> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float16x8x4_t test_vld4q_f16(float16_t const *a) {
return vld4q_f16(a);
}
-// CHECK-LABEL: @test_vld4q_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a)
-// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x4x4_t @test_vld4q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T:%.*]] poison, <4 x float> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x float> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x float> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float32x4x4_t test_vld4q_f32(float32_t const *a) {
return vld4q_f32(a);
}
-// CHECK-LABEL: @test_vld4q_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x4_t @test_vld4q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T:%.*]] poison, <2 x double> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x double> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x2x4_t test_vld4q_f64(float64_t const *a) {
return vld4q_f64(a);
}
-// CHECK-LABEL: @test_vld4q_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x4_t @test_vld4q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
return vld4q_p8(a);
}
-// CHECK-LABEL: @test_vld4q_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x4_t @test_vld4q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
return vld4q_p16(a);
}
-// CHECK-LABEL: @test_vld4_u8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x4_t @test_vld4_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint8x8x4_t test_vld4_u8(uint8_t const *a) {
return vld4_u8(a);
}
-// CHECK-LABEL: @test_vld4_u16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x4_t @test_vld4_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint16x4x4_t test_vld4_u16(uint16_t const *a) {
return vld4_u16(a);
}
-// CHECK-LABEL: @test_vld4_u32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x4_t @test_vld4_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T:%.*]] poison, <2 x i32> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i32> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint32x2x4_t test_vld4_u32(uint32_t const *a) {
return vld4_u32(a);
}
-// CHECK-LABEL: @test_vld4_u64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x4_t @test_vld4_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint64x1x4_t test_vld4_u64(uint64_t const *a) {
return vld4_u64(a);
}
-// CHECK-LABEL: @test_vld4_s8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.int8x8x4_t @test_vld4_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int8x8x4_t test_vld4_s8(int8_t const *a) {
return vld4_s8(a);
}
-// CHECK-LABEL: @test_vld4_s16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int16x4x4_t @test_vld4_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int16x4x4_t test_vld4_s16(int16_t const *a) {
return vld4_s16(a);
}
-// CHECK-LABEL: @test_vld4_s32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int32x2x4_t @test_vld4_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T:%.*]] poison, <2 x i32> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i32> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int32x2x4_t test_vld4_s32(int32_t const *a) {
return vld4_s32(a);
}
-// CHECK-LABEL: @test_vld4_s64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x1x4_t @test_vld4_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int64x1x4_t test_vld4_s64(int64_t const *a) {
return vld4_s64(a);
}
-// CHECK-LABEL: @test_vld4_f16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a)
-// CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float16x4x4_t @test_vld4_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T:%.*]] poison, <4 x half> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x half> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x half> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float16x4x4_t test_vld4_f16(float16_t const *a) {
return vld4_f16(a);
}
-// CHECK-LABEL: @test_vld4_f32(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a)
-// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float32x2x4_t @test_vld4_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T:%.*]] poison, <2 x float> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x float> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x float> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float32x2x4_t test_vld4_f32(float32_t const *a) {
return vld4_f32(a);
}
-// CHECK-LABEL: @test_vld4_f64(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x4_t @test_vld4_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x double> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x1x4_t test_vld4_f64(float64_t const *a) {
return vld4_f64(a);
}
-// CHECK-LABEL: @test_vld4_p8(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x4_t [[TMP5]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x4_t @test_vld4_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly8x8x4_t test_vld4_p8(poly8_t const *a) {
return vld4_p8(a);
}
-// CHECK-LABEL: @test_vld4_p16(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x4_t @test_vld4_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly16x4x4_t test_vld4_p16(poly16_t const *a) {
return vld4_p16(a);
}
-// CHECK-LABEL: @test_vst1q_u8(
-// CHECK: store <16 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <16 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
vst1q_u8(a, b);
}
-// CHECK-LABEL: @test_vst1q_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: store <8 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
vst1q_u16(a, b);
}
-// CHECK-LABEL: @test_vst1q_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: store <4 x i32> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
vst1q_u32(a, b);
}
-// CHECK-LABEL: @test_vst1q_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: store <2 x i64> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
vst1q_u64(a, b);
}
-// CHECK-LABEL: @test_vst1q_s8(
-// CHECK: store <16 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <16 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_s8(int8_t *a, int8x16_t b) {
vst1q_s8(a, b);
}
-// CHECK-LABEL: @test_vst1q_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: store <8 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_s16(int16_t *a, int16x8_t b) {
vst1q_s16(a, b);
}
-// CHECK-LABEL: @test_vst1q_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: store <4 x i32> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_s32(int32_t *a, int32x4_t b) {
vst1q_s32(a, b);
}
-// CHECK-LABEL: @test_vst1q_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: store <2 x i64> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_s64(int64_t *a, int64x2_t b) {
vst1q_s64(a, b);
}
-// CHECK-LABEL: @test_vst1q_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: store <8 x half> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: store <8 x half> [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_f16(float16_t *a, float16x8_t b) {
vst1q_f16(a, b);
}
-// CHECK-LABEL: @test_vst1q_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: store <4 x float> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_f32(float32_t *a, float32x4_t b) {
vst1q_f32(a, b);
}
-// CHECK-LABEL: @test_vst1q_f64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK: store <2 x double> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_f64(float64_t *a, float64x2_t b) {
vst1q_f64(a, b);
}
-// CHECK-LABEL: @test_vst1q_p8(
-// CHECK: store <16 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <16 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
vst1q_p8(a, b);
}
-// CHECK-LABEL: @test_vst1q_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: store <8 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
vst1q_p16(a, b);
}
-// CHECK-LABEL: @test_vst1_u8(
-// CHECK: store <8 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <8 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_u8(uint8_t *a, uint8x8_t b) {
vst1_u8(a, b);
}
-// CHECK-LABEL: @test_vst1_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: store <4 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_u16(uint16_t *a, uint16x4_t b) {
vst1_u16(a, b);
}
-// CHECK-LABEL: @test_vst1_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: store <2 x i32> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_u32(uint32_t *a, uint32x2_t b) {
vst1_u32(a, b);
}
-// CHECK-LABEL: @test_vst1_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: store <1 x i64> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: store <1 x i64> [[TMP1]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_u64(uint64_t *a, uint64x1_t b) {
vst1_u64(a, b);
}
-// CHECK-LABEL: @test_vst1_s8(
-// CHECK: store <8 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <8 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_s8(int8_t *a, int8x8_t b) {
vst1_s8(a, b);
}
-// CHECK-LABEL: @test_vst1_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: store <4 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_s16(int16_t *a, int16x4_t b) {
vst1_s16(a, b);
}
-// CHECK-LABEL: @test_vst1_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: store <2 x i32> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_s32(int32_t *a, int32x2_t b) {
vst1_s32(a, b);
}
-// CHECK-LABEL: @test_vst1_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: store <1 x i64> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: store <1 x i64> [[TMP1]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_s64(int64_t *a, int64x1_t b) {
vst1_s64(a, b);
}
-// CHECK-LABEL: @test_vst1_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: store <4 x half> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: store <4 x half> [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_f16(float16_t *a, float16x4_t b) {
vst1_f16(a, b);
}
-// CHECK-LABEL: @test_vst1_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: store <2 x float> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_f32(float32_t *a, float32x2_t b) {
vst1_f32(a, b);
}
-// CHECK-LABEL: @test_vst1_f64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: store <1 x double> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: store <1 x double> [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_f64(float64_t *a, float64x1_t b) {
vst1_f64(a, b);
}
-// CHECK-LABEL: @test_vst1_p8(
-// CHECK: store <8 x i8> %b, ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: store <8 x i8> [[B]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_p8(poly8_t *a, poly8x8_t b) {
vst1_p8(a, b);
}
-// CHECK-LABEL: @test_vst1_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: store <4 x i16> [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_p16(poly16_t *a, poly16x4_t b) {
vst1_p16(a, b);
}
-// CHECK-LABEL: @test_vst2q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
vst2q_u8(a, b);
}
-// CHECK-LABEL: @test_vst2q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
vst2q_u16(a, b);
}
-// CHECK-LABEL: @test_vst2q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
vst2q_u32(a, b);
}
-// CHECK-LABEL: @test_vst2q_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
vst2q_u64(a, b);
}
-// CHECK-LABEL: @test_vst2q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
vst2q_s8(a, b);
}
-// CHECK-LABEL: @test_vst2q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
vst2q_s16(a, b);
}
-// CHECK-LABEL: @test_vst2q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
vst2q_s32(a, b);
}
-// CHECK-LABEL: @test_vst2q_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
vst2q_s64(a, b);
}
-// CHECK-LABEL: @test_vst2q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP4]], <8 x half> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
vst2q_f16(a, b);
}
-// CHECK-LABEL: @test_vst2q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP4]], <4 x float> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
vst2q_f32(a, b);
}
-// CHECK-LABEL: @test_vst2q_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP4]], <2 x double> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
vst2q_f64(a, b);
}
-// CHECK-LABEL: @test_vst2q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
vst2q_p8(a, b);
}
-// CHECK-LABEL: @test_vst2q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
vst2q_p16(a, b);
}
-// CHECK-LABEL: @test_vst2_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
vst2_u8(a, b);
}
-// CHECK-LABEL: @test_vst2_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
vst2_u16(a, b);
}
-// CHECK-LABEL: @test_vst2_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
vst2_u32(a, b);
}
-// CHECK-LABEL: @test_vst2_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
vst2_u64(a, b);
}
-// CHECK-LABEL: @test_vst2_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_s8(int8_t *a, int8x8x2_t b) {
vst2_s8(a, b);
}
-// CHECK-LABEL: @test_vst2_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_s16(int16_t *a, int16x4x2_t b) {
vst2_s16(a, b);
}
-// CHECK-LABEL: @test_vst2_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_s32(int32_t *a, int32x2x2_t b) {
vst2_s32(a, b);
}
-// CHECK-LABEL: @test_vst2_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_s64(int64_t *a, int64x1x2_t b) {
vst2_s64(a, b);
}
-// CHECK-LABEL: @test_vst2_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP4]], <4 x half> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_f16(float16_t *a, float16x4x2_t b) {
vst2_f16(a, b);
}
-// CHECK-LABEL: @test_vst2_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP4]], <2 x float> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_f32(float32_t *a, float32x2x2_t b) {
vst2_f32(a, b);
}
-// CHECK-LABEL: @test_vst2_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP4]], <1 x double> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_f64(float64_t *a, float64x1x2_t b) {
vst2_f64(a, b);
}
-// CHECK-LABEL: @test_vst2_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
vst2_p8(a, b);
}
-// CHECK-LABEL: @test_vst2_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
vst2_p16(a, b);
}
-// CHECK-LABEL: @test_vst3q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
vst3q_u8(a, b);
}
-// CHECK-LABEL: @test_vst3q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
vst3q_u16(a, b);
}
-// CHECK-LABEL: @test_vst3q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
vst3q_u32(a, b);
}
-// CHECK-LABEL: @test_vst3q_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
vst3q_u64(a, b);
}
-// CHECK-LABEL: @test_vst3q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
vst3q_s8(a, b);
}
-// CHECK-LABEL: @test_vst3q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
vst3q_s16(a, b);
}
-// CHECK-LABEL: @test_vst3q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
vst3q_s32(a, b);
}
-// CHECK-LABEL: @test_vst3q_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
vst3q_s64(a, b);
}
-// CHECK-LABEL: @test_vst3q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP6]], <8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
vst3q_f16(a, b);
}
-// CHECK-LABEL: @test_vst3q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP6]], <4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
vst3q_f32(a, b);
}
-// CHECK-LABEL: @test_vst3q_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
vst3q_f64(a, b);
}
-// CHECK-LABEL: @test_vst3q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
vst3q_p8(a, b);
}
-// CHECK-LABEL: @test_vst3q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
vst3q_p16(a, b);
}
-// CHECK-LABEL: @test_vst3_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
vst3_u8(a, b);
}
-// CHECK-LABEL: @test_vst3_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
vst3_u16(a, b);
}
-// CHECK-LABEL: @test_vst3_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
vst3_u32(a, b);
}
-// CHECK-LABEL: @test_vst3_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
vst3_u64(a, b);
}
-// CHECK-LABEL: @test_vst3_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_s8(int8_t *a, int8x8x3_t b) {
vst3_s8(a, b);
}
-// CHECK-LABEL: @test_vst3_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_s16(int16_t *a, int16x4x3_t b) {
vst3_s16(a, b);
}
-// CHECK-LABEL: @test_vst3_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_s32(int32_t *a, int32x2x3_t b) {
vst3_s32(a, b);
}
-// CHECK-LABEL: @test_vst3_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_s64(int64_t *a, int64x1x3_t b) {
vst3_s64(a, b);
}
-// CHECK-LABEL: @test_vst3_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_f16(float16_t *a, float16x4x3_t b) {
vst3_f16(a, b);
}
-// CHECK-LABEL: @test_vst3_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_f32(float32_t *a, float32x2x3_t b) {
vst3_f32(a, b);
}
-// CHECK-LABEL: @test_vst3_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP6]], <1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_f64(float64_t *a, float64x1x3_t b) {
vst3_f64(a, b);
}
-// CHECK-LABEL: @test_vst3_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
vst3_p8(a, b);
}
-// CHECK-LABEL: @test_vst3_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
vst3_p16(a, b);
}
-// CHECK-LABEL: @test_vst4q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
vst4q_u8(a, b);
}
-// CHECK-LABEL: @test_vst4q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
vst4q_u16(a, b);
}
-// CHECK-LABEL: @test_vst4q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
vst4q_u32(a, b);
}
-// CHECK-LABEL: @test_vst4q_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
vst4q_u64(a, b);
}
-// CHECK-LABEL: @test_vst4q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
vst4q_s8(a, b);
}
-// CHECK-LABEL: @test_vst4q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
vst4q_s16(a, b);
}
-// CHECK-LABEL: @test_vst4q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
vst4q_s32(a, b);
}
-// CHECK-LABEL: @test_vst4q_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
vst4q_s64(a, b);
}
-// CHECK-LABEL: @test_vst4q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
vst4q_f16(a, b);
}
-// CHECK-LABEL: @test_vst4q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
vst4q_f32(a, b);
}
-// CHECK-LABEL: @test_vst4q_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
vst4q_f64(a, b);
}
-// CHECK-LABEL: @test_vst4q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
vst4q_p8(a, b);
}
-// CHECK-LABEL: @test_vst4q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
vst4q_p16(a, b);
}
-// CHECK-LABEL: @test_vst4_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
vst4_u8(a, b);
}
-// CHECK-LABEL: @test_vst4_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
vst4_u16(a, b);
}
-// CHECK-LABEL: @test_vst4_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
vst4_u32(a, b);
}
-// CHECK-LABEL: @test_vst4_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
vst4_u64(a, b);
}
-// CHECK-LABEL: @test_vst4_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_s8(int8_t *a, int8x8x4_t b) {
vst4_s8(a, b);
}
-// CHECK-LABEL: @test_vst4_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_s16(int16_t *a, int16x4x4_t b) {
vst4_s16(a, b);
}
-// CHECK-LABEL: @test_vst4_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_s32(int32_t *a, int32x2x4_t b) {
vst4_s32(a, b);
}
-// CHECK-LABEL: @test_vst4_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_s64(int64_t *a, int64x1x4_t b) {
vst4_s64(a, b);
}
-// CHECK-LABEL: @test_vst4_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_f16(float16_t *a, float16x4x4_t b) {
vst4_f16(a, b);
}
-// CHECK-LABEL: @test_vst4_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_f32(float32_t *a, float32x2x4_t b) {
vst4_f32(a, b);
}
-// CHECK-LABEL: @test_vst4_f64(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_3_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_6_24_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[B_SROA_6_24_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_f64(float64_t *a, float64x1x4_t b) {
vst4_f64(a, b);
}
-// CHECK-LABEL: @test_vst4_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
vst4_p8(a, b);
}
-// CHECK-LABEL: @test_vst4_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
vst4_p16(a, b);
}
-// CHECK-LABEL: @test_vld1q_f64_x2(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x2_t @test_vld1q_f64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T:%.*]] poison, <2 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
return vld1q_f64_x2(a);
}
-// CHECK-LABEL: @test_vld1q_p64_x2(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld1q_p64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T:%.*]] poison, <2 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
return vld1q_p64_x2(a);
}
-// CHECK-LABEL: @test_vld1_f64_x2(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x2_t @test_vld1_f64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
return vld1_f64_x2(a);
}
-// CHECK-LABEL: @test_vld1_p64_x2(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld1_p64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T:%.*]] poison, <1 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
return vld1_p64_x2(a);
}
-// CHECK-LABEL: @test_vld1q_f64_x3(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x3_t @test_vld1q_f64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T:%.*]] poison, <2 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
return vld1q_f64_x3(a);
}
-// CHECK-LABEL: @test_vld1q_p64_x3(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld1q_p64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T:%.*]] poison, <2 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
return vld1q_p64_x3(a);
}
-// CHECK-LABEL: @test_vld1_f64_x3(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x3_t @test_vld1_f64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
return vld1_f64_x3(a);
}
-// CHECK-LABEL: @test_vld1_p64_x3(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld1_p64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T:%.*]] poison, <1 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
return vld1_p64_x3(a);
}
-// CHECK-LABEL: @test_vld1q_f64_x4(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x4_t @test_vld1q_f64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T:%.*]] poison, <2 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x double> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
return vld1q_f64_x4(a);
}
-// CHECK-LABEL: @test_vld1q_p64_x4(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld1q_p64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T:%.*]] poison, <2 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
return vld1q_p64_x4(a);
}
-// CHECK-LABEL: @test_vld1_f64_x4(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x4_t @test_vld1_f64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x double> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
return vld1_f64_x4(a);
}
-// CHECK-LABEL: @test_vld1_p64_x4(
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld1_p64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T:%.*]] poison, <1 x i64> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
return vld1_p64_x4(a);
}
-// CHECK-LABEL: @test_vst1q_f64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP4]], <2 x double> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
vst1q_f64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_p64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
vst1q_p64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_f64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP4]], <1 x double> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
vst1_f64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_p64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p64_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
vst1_p64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_f64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
vst1q_f64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_p64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
vst1q_p64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_f64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP6]], <1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
vst1_f64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_p64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p64_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
vst1_p64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_f64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_f64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
vst1q_f64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_p64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
vst1q_p64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_f64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_f64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_3_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_6_24_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[B_SROA_6_24_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
vst1_f64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_p64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p64_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
vst1_p64_x4(a, b);
}
-// CHECK-LABEL: @test_vceqd_s64(
-// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vceqd_s64(int64_t a, int64_t b) {
return (uint64_t)vceqd_s64(a, b);
}
-// CHECK-LABEL: @test_vceqd_u64(
-// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
return (int64_t)vceqd_u64(a, b);
}
-// CHECK-LABEL: @test_vceqzd_s64(
-// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
-// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqzd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[A]], 0
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQZ_I]]
+//
uint64_t test_vceqzd_s64(int64_t a) {
return (uint64_t)vceqzd_s64(a);
}
-// CHECK-LABEL: @test_vceqzd_u64(
-// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
-// CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQZD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqzd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[A]], 0
+// CHECK-NEXT: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQZD_I]]
+//
int64_t test_vceqzd_u64(int64_t a) {
return (int64_t)vceqzd_u64(a);
}
-// CHECK-LABEL: @test_vcged_s64(
-// CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcged_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sge i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcged_s64(int64_t a, int64_t b) {
return (uint64_t)vcged_s64(a, b);
}
-// CHECK-LABEL: @test_vcged_u64(
-// CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcged_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp uge i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
return (uint64_t)vcged_u64(a, b);
}
-// CHECK-LABEL: @test_vcgezd_s64(
-// CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0
-// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgezd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sge i64 [[A]], 0
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCGEZ_I]]
+//
uint64_t test_vcgezd_s64(int64_t a) {
return (uint64_t)vcgezd_s64(a);
}
-// CHECK-LABEL: @test_vcgtd_s64(
-// CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgtd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcgtd_s64(int64_t a, int64_t b) {
return (uint64_t)vcgtd_s64(a, b);
}
-// CHECK-LABEL: @test_vcgtd_u64(
-// CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgtd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
return (uint64_t)vcgtd_u64(a, b);
}
-// CHECK-LABEL: @test_vcgtzd_s64(
-// CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0
-// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgtzd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[A]], 0
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCGTZ_I]]
+//
uint64_t test_vcgtzd_s64(int64_t a) {
return (uint64_t)vcgtzd_s64(a);
}
-// CHECK-LABEL: @test_vcled_s64(
-// CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcled_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sle i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcled_s64(int64_t a, int64_t b) {
return (uint64_t)vcled_s64(a, b);
}
-// CHECK-LABEL: @test_vcled_u64(
-// CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcled_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
return (uint64_t)vcled_u64(a, b);
}
-// CHECK-LABEL: @test_vclezd_s64(
-// CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0
-// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vclezd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sle i64 [[A]], 0
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCLEZ_I]]
+//
uint64_t test_vclezd_s64(int64_t a) {
return (uint64_t)vclezd_s64(a);
}
-// CHECK-LABEL: @test_vcltd_s64(
-// CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcltd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcltd_s64(int64_t a, int64_t b) {
return (uint64_t)vcltd_s64(a, b);
}
-// CHECK-LABEL: @test_vcltd_u64(
-// CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b
-// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcltd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[A]], [[B]]
+// CHECK-NEXT: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQD_I]]
+//
uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
return (uint64_t)vcltd_u64(a, b);
}
-// CHECK-LABEL: @test_vcltzd_s64(
-// CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0
-// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcltzd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[A]], 0
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCLTZ_I]]
+//
uint64_t test_vcltzd_s64(int64_t a) {
return (uint64_t)vcltzd_s64(a);
}
-// CHECK-LABEL: @test_vtstd_s64(
-// CHECK: [[TMP0:%.*]] = and i64 %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
-// CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
-// CHECK: ret i64 [[VTSTD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vtstd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and i64 [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// CHECK-NEXT: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// CHECK-NEXT: ret i64 [[VTSTD_I]]
+//
uint64_t test_vtstd_s64(int64_t a, int64_t b) {
return (uint64_t)vtstd_s64(a, b);
}
-// CHECK-LABEL: @test_vtstd_u64(
-// CHECK: [[TMP0:%.*]] = and i64 %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
-// CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
-// CHECK: ret i64 [[VTSTD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vtstd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and i64 [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// CHECK-NEXT: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// CHECK-NEXT: ret i64 [[VTSTD_I]]
+//
uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
return (uint64_t)vtstd_u64(a, b);
}
-// CHECK-LABEL: @test_vabsd_s64(
-// CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
-// CHECK: ret i64 [[VABSD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vabsd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 [[A]])
+// CHECK-NEXT: ret i64 [[VABSD_S64_I]]
+//
int64_t test_vabsd_s64(int64_t a) {
return (int64_t)vabsd_s64(a);
}
-// CHECK-LABEL: @test_vqabsb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqabsb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqabsb_s8(int8_t a) {
return (int8_t)vqabsb_s8(a);
}
-// CHECK-LABEL: @test_vqabsh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqabsh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqabsh_s16(int16_t a) {
return (int16_t)vqabsh_s16(a);
}
-// CHECK-LABEL: @test_vqabss_s32(
-// CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
-// CHECK: ret i32 [[VQABSS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqabss_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 [[A]])
+// CHECK-NEXT: ret i32 [[VQABSS_S32_I]]
+//
int32_t test_vqabss_s32(int32_t a) {
return (int32_t)vqabss_s32(a);
}
-// CHECK-LABEL: @test_vqabsd_s64(
-// CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
-// CHECK: ret i64 [[VQABSD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqabsd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 [[A]])
+// CHECK-NEXT: ret i64 [[VQABSD_S64_I]]
+//
int64_t test_vqabsd_s64(int64_t a) {
return (int64_t)vqabsd_s64(a);
}
-// CHECK-LABEL: @test_vnegd_s64(
-// CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a
-// CHECK: ret i64 [[VNEGD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vnegd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VNEGD_I:%.*]] = sub i64 0, [[A]]
+// CHECK-NEXT: ret i64 [[VNEGD_I]]
+//
int64_t test_vnegd_s64(int64_t a) {
return (int64_t)vnegd_s64(a);
}
-// CHECK-LABEL: @test_vqnegb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqnegb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqnegb_s8(int8_t a) {
return (int8_t)vqnegb_s8(a);
}
-// CHECK-LABEL: @test_vqnegh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqnegh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqnegh_s16(int16_t a) {
return (int16_t)vqnegh_s16(a);
}
-// CHECK-LABEL: @test_vqnegs_s32(
-// CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
-// CHECK: ret i32 [[VQNEGS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqnegs_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 [[A]])
+// CHECK-NEXT: ret i32 [[VQNEGS_S32_I]]
+//
int32_t test_vqnegs_s32(int32_t a) {
return (int32_t)vqnegs_s32(a);
}
-// CHECK-LABEL: @test_vqnegd_s64(
-// CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
-// CHECK: ret i64 [[VQNEGD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqnegd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 [[A]])
+// CHECK-NEXT: ret i64 [[VQNEGD_S64_I]]
+//
int64_t test_vqnegd_s64(int64_t a) {
return (int64_t)vqnegd_s64(a);
}
-// CHECK-LABEL: @test_vuqaddb_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vuqaddb_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
return (int8_t)vuqaddb_s8(a, b);
}
-// CHECK-LABEL: @test_vuqaddh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vuqaddh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
return (int16_t)vuqaddh_s16(a, b);
}
-// CHECK-LABEL: @test_vuqadds_s32(
-// CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VUQADDS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vuqadds_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VUQADDS_S32_I]]
+//
int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
return (int32_t)vuqadds_s32(a, b);
}
-// CHECK-LABEL: @test_vuqaddd_s64(
-// CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VUQADDD_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vuqaddd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VUQADDD_S64_I]]
+//
int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
return (int64_t)vuqaddd_s64(a, b);
}
-// CHECK-LABEL: @test_vsqaddb_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
-// CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
-// CHECK: ret i8 [[TMP2]]
+// CHECK-LABEL: define dso_local i8 @test_vsqaddb_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// CHECK-NEXT: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP2]]
+//
uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
return (uint8_t)vsqaddb_u8(a, b);
}
-// CHECK-LABEL: @test_vsqaddh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
-// CHECK: ret i16 [[TMP2]]
+// CHECK-LABEL: define dso_local i16 @test_vsqaddh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
return (uint16_t)vsqaddh_u16(a, b);
}
-// CHECK-LABEL: @test_vsqadds_u32(
-// CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
-// CHECK: ret i32 [[VSQADDS_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vsqadds_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i32 [[VSQADDS_U32_I]]
+//
uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
return (uint32_t)vsqadds_u32(a, b);
}
-// CHECK-LABEL: @test_vsqaddd_u64(
-// CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
-// CHECK: ret i64 [[VSQADDD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vsqaddd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT: ret i64 [[VSQADDD_U64_I]]
+//
uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
return (uint64_t)vsqaddd_u64(a, b);
}
-// CHECK-LABEL: @test_vqdmlalh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
-// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
-// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
-// CHECK: ret i32 [[VQDMLXL1_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlalh_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
+// CHECK-NEXT: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// CHECK-NEXT: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[LANE0_I]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1_I]]
+//
int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
return (int32_t)vqdmlalh_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlals_s32(
-// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
-// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
-// CHECK: ret i64 [[VQDMLXL1_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlals_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[C]])
+// CHECK-NEXT: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A]], i64 [[VQDMLXL_I]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1_I]]
+//
int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
return (int64_t)vqdmlals_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlslh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
-// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
-// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
-// CHECK: ret i32 [[VQDMLXL1_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlslh_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
+// CHECK-NEXT: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// CHECK-NEXT: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[LANE0_I]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1_I]]
+//
int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
return (int32_t)vqdmlslh_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsls_s32(
-// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
-// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
-// CHECK: ret i64 [[VQDMLXL1_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlsls_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[C]])
+// CHECK-NEXT: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A]], i64 [[VQDMLXL_I]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1_I]]
+//
int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
return (int64_t)vqdmlsls_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmullh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
-// CHECK: ret i32 [[TMP2]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmullh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
return (int32_t)vqdmullh_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulls_s32(
-// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
-// CHECK: ret i64 [[VQDMULLS_S32_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmulls_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A]], i32 [[B]])
+// CHECK-NEXT: ret i64 [[VQDMULLS_S32_I]]
+//
int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
return (int64_t)vqdmulls_s32(a, b);
}
-// CHECK-LABEL: @test_vqmovunh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqmovunh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
uint8_t test_vqmovunh_s16(int16_t a) {
return (uint8_t)vqmovunh_s16(a);
}
-// CHECK-LABEL: @test_vqmovuns_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqmovuns_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
uint16_t test_vqmovuns_s32(int32_t a) {
return (uint16_t)vqmovuns_s32(a);
}
-// CHECK-LABEL: @test_vqmovund_s64(
-// CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
-// CHECK: ret i32 [[VQMOVUND_S64_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqmovund_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 [[A]])
+// CHECK-NEXT: ret i32 [[VQMOVUND_S64_I]]
+//
uint32_t test_vqmovund_s64(int64_t a) {
return (uint32_t)vqmovund_s64(a);
}
-// CHECK-LABEL: @test_vqmovnh_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqmovnh_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqmovnh_s16(int16_t a) {
return (int8_t)vqmovnh_s16(a);
}
-// CHECK-LABEL: @test_vqmovns_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqmovns_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqmovns_s32(int32_t a) {
return (int16_t)vqmovns_s32(a);
}
-// CHECK-LABEL: @test_vqmovnd_s64(
-// CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
-// CHECK: ret i32 [[VQMOVND_S64_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqmovnd_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 [[A]])
+// CHECK-NEXT: ret i32 [[VQMOVND_S64_I]]
+//
int32_t test_vqmovnd_s64(int64_t a) {
return (int32_t)vqmovnd_s64(a);
}
-// CHECK-LABEL: @test_vqmovnh_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqmovnh_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqmovnh_u16(int16_t a) {
return (int8_t)vqmovnh_u16(a);
}
-// CHECK-LABEL: @test_vqmovns_u32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqmovns_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqmovns_u32(int32_t a) {
return (int16_t)vqmovns_u32(a);
}
-// CHECK-LABEL: @test_vqmovnd_u64(
-// CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
-// CHECK: ret i32 [[VQMOVND_U64_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqmovnd_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 [[A]])
+// CHECK-NEXT: ret i32 [[VQMOVND_U64_I]]
+//
int32_t test_vqmovnd_u64(int64_t a) {
return (int32_t)vqmovnd_u64(a);
}
-// CHECK-LABEL: @test_vceqs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i32 @test_vceqs_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq float [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vceqs_f32(float32_t a, float32_t b) {
return (uint32_t)vceqs_f32(a, b);
}
-// CHECK-LABEL: @test_vceqd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq double [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vceqd_f64(float64_t a, float64_t b) {
return (uint64_t)vceqd_f64(a, b);
}
-// CHECK-LABEL: @test_vceqzs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
-// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local i32 @test_vceqzs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq float [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCEQZ_I]]
+//
uint32_t test_vceqzs_f32(float32_t a) {
return (uint32_t)vceqzs_f32(a);
}
-// CHECK-LABEL: @test_vceqzd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
-// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vceqzd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq double [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCEQZ_I]]
+//
uint64_t test_vceqzd_f64(float64_t a) {
return (uint64_t)vceqzd_f64(a);
}
-// CHECK-LABEL: @test_vcges_f32(
-// CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcges_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge float [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcges_f32(float32_t a, float32_t b) {
return (uint32_t)vcges_f32(a, b);
}
-// CHECK-LABEL: @test_vcged_f64(
-// CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcged_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge double [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcged_f64(float64_t a, float64_t b) {
return (uint64_t)vcged_f64(a, b);
}
-// CHECK-LABEL: @test_vcgezs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
-// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcgezs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge float [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCGEZ_I]]
+//
uint32_t test_vcgezs_f32(float32_t a) {
return (uint32_t)vcgezs_f32(a);
}
-// CHECK-LABEL: @test_vcgezd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
-// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgezd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp oge double [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCGEZ_I]]
+//
uint64_t test_vcgezd_f64(float64_t a) {
return (uint64_t)vcgezd_f64(a);
}
-// CHECK-LABEL: @test_vcgts_f32(
-// CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcgts_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt float [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcgts_f32(float32_t a, float32_t b) {
return (uint32_t)vcgts_f32(a, b);
}
-// CHECK-LABEL: @test_vcgtd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgtd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt double [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
return (uint64_t)vcgtd_f64(a, b);
}
-// CHECK-LABEL: @test_vcgtzs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
-// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcgtzs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt float [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCGTZ_I]]
+//
uint32_t test_vcgtzs_f32(float32_t a) {
return (uint32_t)vcgtzs_f32(a);
}
-// CHECK-LABEL: @test_vcgtzd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
-// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcgtzd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ogt double [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCGTZ_I]]
+//
uint64_t test_vcgtzd_f64(float64_t a) {
return (uint64_t)vcgtzd_f64(a);
}
-// CHECK-LABEL: @test_vcles_f32(
-// CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcles_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole float [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vcles_f32(float32_t a, float32_t b) {
return (uint32_t)vcles_f32(a, b);
}
-// CHECK-LABEL: @test_vcled_f64(
-// CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcled_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole double [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcled_f64(float64_t a, float64_t b) {
return (uint64_t)vcled_f64(a, b);
}
-// CHECK-LABEL: @test_vclezs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
-// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local i32 @test_vclezs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole float [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCLEZ_I]]
+//
uint32_t test_vclezs_f32(float32_t a) {
return (uint32_t)vclezs_f32(a);
}
-// CHECK-LABEL: @test_vclezd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
-// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vclezd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole double [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCLEZ_I]]
+//
uint64_t test_vclezd_f64(float64_t a) {
return (uint64_t)vclezd_f64(a);
}
-// CHECK-LABEL: @test_vclts_f32(
-// CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i32 @test_vclts_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCMPD_I]]
+//
uint32_t test_vclts_f32(float32_t a, float32_t b) {
return (uint32_t)vclts_f32(a, b);
}
-// CHECK-LABEL: @test_vcltd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b
-// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCMPD_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcltd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double [[A]], [[B]]
+// CHECK-NEXT: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCMPD_I]]
+//
uint64_t test_vcltd_f64(float64_t a, float64_t b) {
return (uint64_t)vcltd_f64(a, b);
}
-// CHECK-LABEL: @test_vcltzs_f32(
-// CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
-// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
-// CHECK: ret i32 [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcltzs_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt float [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[VCLTZ_I]]
+//
uint32_t test_vcltzs_f32(float32_t a) {
return (uint32_t)vcltzs_f32(a);
}
-// CHECK-LABEL: @test_vcltzd_f64(
-// CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
-// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK: ret i64 [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcltzd_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt double [[A]], 0.000000e+00
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[VCLTZ_I]]
+//
uint64_t test_vcltzd_f64(float64_t a) {
return (uint64_t)vcltzd_f64(a);
}
-// CHECK-LABEL: @test_vcages_f32(
-// CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
-// CHECK: ret i32 [[VCAGES_F32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcages_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret i32 [[VCAGES_F32_I]]
+//
uint32_t test_vcages_f32(float32_t a, float32_t b) {
return (uint32_t)vcages_f32(a, b);
}
-// CHECK-LABEL: @test_vcaged_f64(
-// CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
-// CHECK: ret i64 [[VCAGED_F64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcaged_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret i64 [[VCAGED_F64_I]]
+//
uint64_t test_vcaged_f64(float64_t a, float64_t b) {
return (uint64_t)vcaged_f64(a, b);
}
-// CHECK-LABEL: @test_vcagts_f32(
-// CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
-// CHECK: ret i32 [[VCAGTS_F32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcagts_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret i32 [[VCAGTS_F32_I]]
+//
uint32_t test_vcagts_f32(float32_t a, float32_t b) {
return (uint32_t)vcagts_f32(a, b);
}
-// CHECK-LABEL: @test_vcagtd_f64(
-// CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
-// CHECK: ret i64 [[VCAGTD_F64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcagtd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret i64 [[VCAGTD_F64_I]]
+//
uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
return (uint64_t)vcagtd_f64(a, b);
}
-// CHECK-LABEL: @test_vcales_f32(
-// CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
-// CHECK: ret i32 [[VCALES_F32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcales_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float [[B]], float [[A]])
+// CHECK-NEXT: ret i32 [[VCALES_F32_I]]
+//
uint32_t test_vcales_f32(float32_t a, float32_t b) {
return (uint32_t)vcales_f32(a, b);
}
-// CHECK-LABEL: @test_vcaled_f64(
-// CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
-// CHECK: ret i64 [[VCALED_F64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcaled_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double [[B]], double [[A]])
+// CHECK-NEXT: ret i64 [[VCALED_F64_I]]
+//
uint64_t test_vcaled_f64(float64_t a, float64_t b) {
return (uint64_t)vcaled_f64(a, b);
}
-// CHECK-LABEL: @test_vcalts_f32(
-// CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
-// CHECK: ret i32 [[VCALTS_F32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vcalts_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float [[B]], float [[A]])
+// CHECK-NEXT: ret i32 [[VCALTS_F32_I]]
+//
uint32_t test_vcalts_f32(float32_t a, float32_t b) {
return (uint32_t)vcalts_f32(a, b);
}
-// CHECK-LABEL: @test_vcaltd_f64(
-// CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
-// CHECK: ret i64 [[VCALTD_F64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vcaltd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double [[B]], double [[A]])
+// CHECK-NEXT: ret i64 [[VCALTD_F64_I]]
+//
uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
return (uint64_t)vcaltd_f64(a, b);
}
-// CHECK-LABEL: @test_vshrd_n_s64(
-// CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1
-// CHECK: ret i64 [[SHRD_N]]
+// CHECK-LABEL: define dso_local i64 @test_vshrd_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHRD_N:%.*]] = ashr i64 [[A]], 1
+// CHECK-NEXT: ret i64 [[SHRD_N]]
+//
int64_t test_vshrd_n_s64(int64_t a) {
return (int64_t)vshrd_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshr_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHR_N]]
+//
int64x1_t test_vshr_n_s64(int64x1_t a) {
return vshr_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshrd_n_u64(
-// CHECK: ret i64 0
+// CHECK-LABEL: define dso_local i64 @test_vshrd_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret i64 0
+//
uint64_t test_vshrd_n_u64(uint64_t a) {
return (uint64_t)vshrd_n_u64(a, 64);
}
-// CHECK-LABEL: @test_vshrd_n_u64_2(
-// CHECK: ret i64 0
+// CHECK-LABEL: define dso_local i64 @test_vshrd_n_u64_2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret i64 0
+//
uint64_t test_vshrd_n_u64_2() {
uint64_t a = UINT64_C(0xf000000000000000);
return vshrd_n_u64(a, 64);
}
-// CHECK-LABEL: @test_vshr_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHR_N]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshr_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHR_N]]
+//
uint64x1_t test_vshr_n_u64(uint64x1_t a) {
return vshr_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vrshrd_n_s64(
-// CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
-// CHECK: ret i64 [[VRSHR_N]]
+// CHECK-LABEL: define dso_local i64 @test_vrshrd_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[A]], i64 -63)
+// CHECK-NEXT: ret i64 [[VRSHR_N]]
+//
int64_t test_vrshrd_n_s64(int64_t a) {
return (int64_t)vrshrd_n_s64(a, 63);
}
-// CHECK-LABEL: @test_vrshr_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrshr_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VRSHR_N1]]
+//
int64x1_t test_vrshr_n_s64(int64x1_t a) {
return vrshr_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vrshrd_n_u64(
-// CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
-// CHECK: ret i64 [[VRSHR_N]]
+// CHECK-LABEL: define dso_local i64 @test_vrshrd_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[A]], i64 -63)
+// CHECK-NEXT: ret i64 [[VRSHR_N]]
+//
uint64_t test_vrshrd_n_u64(uint64_t a) {
return (uint64_t)vrshrd_n_u64(a, 63);
}
-// CHECK-LABEL: @test_vrshr_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrshr_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VRSHR_N1]]
+//
uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
return vrshr_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vsrad_n_s64(
-// CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63
-// CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
-// CHECK: ret i64 [[TMP0]]
+// CHECK-LABEL: define dso_local i64 @test_vsrad_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHRD_N:%.*]] = ashr i64 [[B]], 63
+// CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A]], [[SHRD_N]]
+// CHECK-NEXT: ret i64 [[TMP0]]
+//
int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
return (int64_t)vsrad_n_s64(a, b, 63);
}
-// CHECK-LABEL: @test_vsra_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <1 x i64> [[TMP4]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsra_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <1 x i64> [[TMP4]]
+//
int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
return vsra_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsrad_n_u64(
-// CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63
-// CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
-// CHECK: ret i64 [[TMP0]]
+// CHECK-LABEL: define dso_local i64 @test_vsrad_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHRD_N:%.*]] = lshr i64 [[B]], 63
+// CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A]], [[SHRD_N]]
+// CHECK-NEXT: ret i64 [[TMP0]]
+//
uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
return (uint64_t)vsrad_n_u64(a, b, 63);
}
-// CHECK-LABEL: @test_vsrad_n_u64_2(
-// CHECK: ret i64 %a
+// CHECK-LABEL: define dso_local i64 @test_vsrad_n_u64_2(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret i64 [[A]]
+//
uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
return (uint64_t)vsrad_n_u64(a, b, 64);
}
-// CHECK-LABEL: @test_vsra_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <1 x i64> [[TMP4]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsra_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <1 x i64> [[TMP4]]
+//
uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
return vsra_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsrad_n_s64(
-// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
-// CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
-// CHECK: ret i64 [[TMP1]]
+// CHECK-LABEL: define dso_local i64 @test_vrsrad_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[B]], i64 -63)
+// CHECK-NEXT: [[TMP1:%.*]] = add i64 [[A]], [[TMP0]]
+// CHECK-NEXT: ret i64 [[TMP1]]
+//
int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
return (int64_t)vrsrad_n_s64(a, b, 63);
}
-// CHECK-LABEL: @test_vrsra_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <1 x i64> [[TMP3]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrsra_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <1 x i64> [[TMP3]]
+//
int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
return vrsra_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsrad_n_u64(
-// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
-// CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
-// CHECK: ret i64 [[TMP1]]
+// CHECK-LABEL: define dso_local i64 @test_vrsrad_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[B]], i64 -63)
+// CHECK-NEXT: [[TMP1:%.*]] = add i64 [[A]], [[TMP0]]
+// CHECK-NEXT: ret i64 [[TMP1]]
+//
uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
return (uint64_t)vrsrad_n_u64(a, b, 63);
}
-// CHECK-LABEL: @test_vrsra_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
-// CHECK: ret <1 x i64> [[TMP3]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vrsra_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+// CHECK-NEXT: ret <1 x i64> [[TMP3]]
+//
uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
return vrsra_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vshld_n_s64(
-// CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1
-// CHECK: ret i64 [[SHLD_N]]
+// CHECK-LABEL: define dso_local i64 @test_vshld_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHLD_N:%.*]] = shl i64 [[A]], 1
+// CHECK-NEXT: ret i64 [[SHLD_N]]
+//
int64_t test_vshld_n_s64(int64_t a) {
return (int64_t)vshld_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
+//
int64x1_t test_vshl_n_s64(int64x1_t a) {
return vshl_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshld_n_u64(
-// CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63
-// CHECK: ret i64 [[SHLD_N]]
+// CHECK-LABEL: define dso_local i64 @test_vshld_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHLD_N:%.*]] = shl i64 [[A]], 63
+// CHECK-NEXT: ret i64 [[SHLD_N]]
+//
uint64_t test_vshld_n_u64(uint64_t a) {
return (uint64_t)vshld_n_u64(a, 63);
}
-// CHECK-LABEL: @test_vshl_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHL_N]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
+//
uint64x1_t test_vshl_n_u64(uint64x1_t a) {
return vshl_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqshlb_n_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshlb_n_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqshlb_n_s8(int8_t a) {
return (int8_t)vqshlb_n_s8(a, 7);
}
-// CHECK-LABEL: @test_vqshlh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshlh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqshlh_n_s16(int16_t a) {
return (int16_t)vqshlh_n_s16(a, 15);
}
-// CHECK-LABEL: @test_vqshls_n_s32(
-// CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
-// CHECK: ret i32 [[VQSHLS_N_S32]]
+// CHECK-LABEL: define dso_local i32 @test_vqshls_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 [[A]], i32 31)
+// CHECK-NEXT: ret i32 [[VQSHLS_N_S32]]
+//
int32_t test_vqshls_n_s32(int32_t a) {
return (int32_t)vqshls_n_s32(a, 31);
}
-// CHECK-LABEL: @test_vqshld_n_s64(
-// CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
-// CHECK: ret i64 [[VQSHL_N]]
+// CHECK-LABEL: define dso_local i64 @test_vqshld_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[A]], i64 63)
+// CHECK-NEXT: ret i64 [[VQSHL_N]]
+//
int64_t test_vqshld_n_s64(int64_t a) {
return (int64_t)vqshld_n_s64(a, 63);
}
-// CHECK-LABEL: @test_vqshl_n_s8(
-// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-// CHECK: ret <8 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshl_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[A]], <8 x i8> zeroinitializer)
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_N]]
+//
int8x8_t test_vqshl_n_s8(int8x8_t a) {
return vqshl_n_s8(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_s8(
-// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-// CHECK: ret <16 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshlq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[A]], <16 x i8> zeroinitializer)
+// CHECK-NEXT: ret <16 x i8> [[VQSHL_N]]
+//
int8x16_t test_vqshlq_n_s8(int8x16_t a) {
return vqshlq_n_s8(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
-// CHECK: ret <4 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshl_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// CHECK-NEXT: ret <4 x i16> [[VQSHL_N1]]
+//
int16x4_t test_vqshl_n_s16(int16x4_t a) {
return vqshl_n_s16(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
-// CHECK: ret <8 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshlq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// CHECK-NEXT: ret <8 x i16> [[VQSHL_N1]]
+//
int16x8_t test_vqshlq_n_s16(int16x8_t a) {
return vqshlq_n_s16(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
-// CHECK: ret <2 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshl_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// CHECK-NEXT: ret <2 x i32> [[VQSHL_N1]]
+//
int32x2_t test_vqshl_n_s32(int32x2_t a) {
return vqshl_n_s32(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
-// CHECK: ret <4 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshlq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: ret <4 x i32> [[VQSHL_N1]]
+//
int32x4_t test_vqshlq_n_s32(int32x4_t a) {
return vqshlq_n_s32(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
-// CHECK: ret <2 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqshlq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// CHECK-NEXT: ret <2 x i64> [[VQSHL_N1]]
+//
int64x2_t test_vqshlq_n_s64(int64x2_t a) {
return vqshlq_n_s64(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_u8(
-// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-// CHECK: ret <8 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqshl_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[A]], <8 x i8> zeroinitializer)
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_N]]
+//
uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
return vqshl_n_u8(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_u8(
-// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-// CHECK: ret <16 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqshlq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[A]], <16 x i8> zeroinitializer)
+// CHECK-NEXT: ret <16 x i8> [[VQSHL_N]]
+//
uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
return vqshlq_n_u8(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
-// CHECK: ret <4 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqshl_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// CHECK-NEXT: ret <4 x i16> [[VQSHL_N1]]
+//
uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
return vqshl_n_u16(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
-// CHECK: ret <8 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqshlq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// CHECK-NEXT: ret <8 x i16> [[VQSHL_N1]]
+//
uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
return vqshlq_n_u16(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
-// CHECK: ret <2 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqshl_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// CHECK-NEXT: ret <2 x i32> [[VQSHL_N1]]
+//
uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
return vqshl_n_u32(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
-// CHECK: ret <4 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqshlq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: ret <4 x i32> [[VQSHL_N1]]
+//
uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
return vqshlq_n_u32(a, 0);
}
-// CHECK-LABEL: @test_vqshlq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
-// CHECK: ret <2 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqshlq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// CHECK-NEXT: ret <2 x i64> [[VQSHL_N1]]
+//
uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
return vqshlq_n_u64(a, 0);
}
-// CHECK-LABEL: @test_vqshl_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqshl_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHL_N1]]
+//
int64x1_t test_vqshl_n_s64(int64x1_t a) {
return vqshl_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshlb_n_u8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshlb_n_u8(
+// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
uint8_t test_vqshlb_n_u8(uint8_t a) {
return (uint8_t)vqshlb_n_u8(a, 7);
}
-// CHECK-LABEL: @test_vqshlh_n_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshlh_n_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
uint16_t test_vqshlh_n_u16(uint16_t a) {
return (uint16_t)vqshlh_n_u16(a, 15);
}
-// CHECK-LABEL: @test_vqshls_n_u32(
-// CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
-// CHECK: ret i32 [[VQSHLS_N_U32]]
+// CHECK-LABEL: define dso_local i32 @test_vqshls_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 [[A]], i32 31)
+// CHECK-NEXT: ret i32 [[VQSHLS_N_U32]]
+//
uint32_t test_vqshls_n_u32(uint32_t a) {
return (uint32_t)vqshls_n_u32(a, 31);
}
-// CHECK-LABEL: @test_vqshld_n_u64(
-// CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
-// CHECK: ret i64 [[VQSHL_N]]
+// CHECK-LABEL: define dso_local i64 @test_vqshld_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[A]], i64 63)
+// CHECK-NEXT: ret i64 [[VQSHL_N]]
+//
uint64_t test_vqshld_n_u64(uint64_t a) {
return (uint64_t)vqshld_n_u64(a, 63);
}
-// CHECK-LABEL: @test_vqshl_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqshl_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHL_N1]]
+//
uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
return vqshl_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqshlub_n_s8(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
-// CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshlub_n_s8(
+// CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqshlub_n_s8(int8_t a) {
return (int8_t)vqshlub_n_s8(a, 7);
}
-// CHECK-LABEL: @test_vqshluh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshluh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqshluh_n_s16(int16_t a) {
return (int16_t)vqshluh_n_s16(a, 15);
}
-// CHECK-LABEL: @test_vqshlus_n_s32(
-// CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
-// CHECK: ret i32 [[VQSHLUS_N_S32]]
+// CHECK-LABEL: define dso_local i32 @test_vqshlus_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[A]], i32 31)
+// CHECK-NEXT: ret i32 [[VQSHLUS_N_S32]]
+//
int32_t test_vqshlus_n_s32(int32_t a) {
return (int32_t)vqshlus_n_s32(a, 31);
}
-// CHECK-LABEL: @test_vqshlud_n_s64(
-// CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
-// CHECK: ret i64 [[VQSHLU_N]]
+// CHECK-LABEL: define dso_local i64 @test_vqshlud_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[A]], i64 63)
+// CHECK-NEXT: ret i64 [[VQSHLU_N]]
+//
int64_t test_vqshlud_n_s64(int64_t a) {
return (int64_t)vqshlud_n_s64(a, 63);
}
-// CHECK-LABEL: @test_vqshlu_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHLU_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqshlu_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHLU_N1]]
+//
uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
return vqshlu_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vsrid_n_s64(
-// CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
-// CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
-// CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
-// CHECK: ret i64 [[VSRID_N_S643]]
+// CHECK-LABEL: define dso_local i64 @test_vsrid_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRID_N_S64:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[VSRID_N_S641:%.*]] = bitcast i64 [[B]] to <1 x i64>
+// CHECK-NEXT: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
+// CHECK-NEXT: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
+// CHECK-NEXT: ret i64 [[VSRID_N_S643]]
+//
int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
return (int64_t)vsrid_n_s64(a, b, 63);
}
-// CHECK-LABEL: @test_vsri_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
-// CHECK: ret <1 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsri_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// CHECK-NEXT: ret <1 x i64> [[VSRI_N2]]
+//
int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
return vsri_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsrid_n_u64(
-// CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
-// CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
-// CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
-// CHECK: ret i64 [[VSRID_N_U643]]
+// CHECK-LABEL: define dso_local i64 @test_vsrid_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRID_N_U64:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[VSRID_N_U641:%.*]] = bitcast i64 [[B]] to <1 x i64>
+// CHECK-NEXT: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
+// CHECK-NEXT: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
+// CHECK-NEXT: ret i64 [[VSRID_N_U643]]
+//
uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
return (uint64_t)vsrid_n_u64(a, b, 63);
}
-// CHECK-LABEL: @test_vsri_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
-// CHECK: ret <1 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsri_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// CHECK-NEXT: ret <1 x i64> [[VSRI_N2]]
+//
uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
return vsri_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vslid_n_s64(
-// CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
-// CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
-// CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
-// CHECK: ret i64 [[VSLID_N_S643]]
+// CHECK-LABEL: define dso_local i64 @test_vslid_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLID_N_S64:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[VSLID_N_S641:%.*]] = bitcast i64 [[B]] to <1 x i64>
+// CHECK-NEXT: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
+// CHECK-NEXT: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
+// CHECK-NEXT: ret i64 [[VSLID_N_S643]]
+//
int64_t test_vslid_n_s64(int64_t a, int64_t b) {
return (int64_t)vslid_n_s64(a, b, 63);
}
-// CHECK-LABEL: @test_vsli_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsli_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
return vsli_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vslid_n_u64(
-// CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
-// CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
-// CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
-// CHECK: ret i64 [[VSLID_N_U643]]
+// CHECK-LABEL: define dso_local i64 @test_vslid_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLID_N_U64:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[VSLID_N_U641:%.*]] = bitcast i64 [[B]] to <1 x i64>
+// CHECK-NEXT: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
+// CHECK-NEXT: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
+// CHECK-NEXT: ret i64 [[VSLID_N_U643]]
+//
uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
return (uint64_t)vslid_n_u64(a, b, 63);
}
-// CHECK-LABEL: @test_vsli_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsli_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
return vsli_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vqshrnh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshrnh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqshrnh_n_s16(int16_t a) {
return (int8_t)vqshrnh_n_s16(a, 8);
}
-// CHECK-LABEL: @test_vqshrns_n_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshrns_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqshrns_n_s32(int32_t a) {
return (int16_t)vqshrns_n_s32(a, 16);
}
-// CHECK-LABEL: @test_vqshrnd_n_s64(
-// CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQSHRND_N_S64]]
+// CHECK-LABEL: define dso_local i32 @test_vqshrnd_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQSHRND_N_S64]]
+//
int32_t test_vqshrnd_n_s64(int64_t a) {
return (int32_t)vqshrnd_n_s64(a, 32);
}
-// CHECK-LABEL: @test_vqshrnh_n_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshrnh_n_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
uint8_t test_vqshrnh_n_u16(uint16_t a) {
return (uint8_t)vqshrnh_n_u16(a, 8);
}
-// CHECK-LABEL: @test_vqshrns_n_u32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshrns_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
uint16_t test_vqshrns_n_u32(uint32_t a) {
return (uint16_t)vqshrns_n_u32(a, 16);
}
-// CHECK-LABEL: @test_vqshrnd_n_u64(
-// CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQSHRND_N_U64]]
+// CHECK-LABEL: define dso_local i32 @test_vqshrnd_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQSHRND_N_U64]]
+//
uint32_t test_vqshrnd_n_u64(uint64_t a) {
return (uint32_t)vqshrnd_n_u64(a, 32);
}
-// CHECK-LABEL: @test_vqrshrnh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqrshrnh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqrshrnh_n_s16(int16_t a) {
return (int8_t)vqrshrnh_n_s16(a, 8);
}
-// CHECK-LABEL: @test_vqrshrns_n_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqrshrns_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqrshrns_n_s32(int32_t a) {
return (int16_t)vqrshrns_n_s32(a, 16);
}
-// CHECK-LABEL: @test_vqrshrnd_n_s64(
-// CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQRSHRND_N_S64]]
+// CHECK-LABEL: define dso_local i32 @test_vqrshrnd_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQRSHRND_N_S64]]
+//
int32_t test_vqrshrnd_n_s64(int64_t a) {
return (int32_t)vqrshrnd_n_s64(a, 32);
}
-// CHECK-LABEL: @test_vqrshrnh_n_u16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqrshrnh_n_u16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
uint8_t test_vqrshrnh_n_u16(uint16_t a) {
return (uint8_t)vqrshrnh_n_u16(a, 8);
}
-// CHECK-LABEL: @test_vqrshrns_n_u32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqrshrns_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
uint16_t test_vqrshrns_n_u32(uint32_t a) {
return (uint16_t)vqrshrns_n_u32(a, 16);
}
-// CHECK-LABEL: @test_vqrshrnd_n_u64(
-// CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQRSHRND_N_U64]]
+// CHECK-LABEL: define dso_local i32 @test_vqrshrnd_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQRSHRND_N_U64]]
+//
uint32_t test_vqrshrnd_n_u64(uint64_t a) {
return (uint32_t)vqrshrnd_n_u64(a, 32);
}
-// CHECK-LABEL: @test_vqshrunh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqshrunh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
int8_t test_vqshrunh_n_s16(int16_t a) {
return (int8_t)vqshrunh_n_s16(a, 8);
}
-// CHECK-LABEL: @test_vqshruns_n_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqshruns_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
int16_t test_vqshruns_n_s32(int32_t a) {
return (int16_t)vqshruns_n_s32(a, 16);
}
-// CHECK-LABEL: @test_vqshrund_n_s64(
-// CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQSHRUND_N_S64]]
+// CHECK-LABEL: define dso_local i32 @test_vqshrund_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQSHRUND_N_S64]]
+//
int32_t test_vqshrund_n_s64(int64_t a) {
return (int32_t)vqshrund_n_s64(a, 32);
}
-// CHECK-LABEL: @test_vqrshrunh_n_s16(
-// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
-// CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
-// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
-// CHECK: ret i8 [[TMP1]]
+// CHECK-LABEL: define dso_local i8 @test_vqrshrunh_n_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
uint8_t test_vqrshrunh_n_s16(int16_t a) {
return (uint8_t)vqrshrunh_n_s16(a, 8);
}
-// CHECK-LABEL: @test_vqrshruns_n_s32(
-// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
-// CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
-// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
-// CHECK: ret i16 [[TMP1]]
+// CHECK-LABEL: define dso_local i16 @test_vqrshruns_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+// CHECK-NEXT: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
+// CHECK-NEXT: ret i16 [[TMP1]]
+//
uint16_t test_vqrshruns_n_s32(int32_t a) {
return (uint16_t)vqrshruns_n_s32(a, 16);
}
-// CHECK-LABEL: @test_vqrshrund_n_s64(
-// CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
-// CHECK: ret i32 [[VQRSHRUND_N_S64]]
+// CHECK-LABEL: define dso_local i32 @test_vqrshrund_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VQRSHRUND_N_S64]]
+//
uint32_t test_vqrshrund_n_s64(int64_t a) {
return (uint32_t)vqrshrund_n_s64(a, 32);
}
-// CHECK-LABEL: @test_vcvts_n_f32_s32(
-// CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
-// CHECK: ret float [[VCVTS_N_F32_S32]]
+// CHECK-LABEL: define dso_local float @test_vcvts_n_f32_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 [[A]], i32 1)
+// CHECK-NEXT: ret float [[VCVTS_N_F32_S32]]
+//
float32_t test_vcvts_n_f32_s32(int32_t a) {
return vcvts_n_f32_s32(a, 1);
}
-// CHECK-LABEL: @test_vcvtd_n_f64_s64(
-// CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
-// CHECK: ret double [[VCVTD_N_F64_S64]]
+// CHECK-LABEL: define dso_local double @test_vcvtd_n_f64_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 [[A]], i32 1)
+// CHECK-NEXT: ret double [[VCVTD_N_F64_S64]]
+//
float64_t test_vcvtd_n_f64_s64(int64_t a) {
return vcvtd_n_f64_s64(a, 1);
}
-// CHECK-LABEL: @test_vcvts_n_f32_u32(
-// CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
-// CHECK: ret float [[VCVTS_N_F32_U32]]
+// CHECK-LABEL: define dso_local float @test_vcvts_n_f32_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 [[A]], i32 32)
+// CHECK-NEXT: ret float [[VCVTS_N_F32_U32]]
+//
float32_t test_vcvts_n_f32_u32(uint32_t a) {
return vcvts_n_f32_u32(a, 32);
}
-// CHECK-LABEL: @test_vcvtd_n_f64_u64(
-// CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
-// CHECK: ret double [[VCVTD_N_F64_U64]]
+// CHECK-LABEL: define dso_local double @test_vcvtd_n_f64_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 [[A]], i32 64)
+// CHECK-NEXT: ret double [[VCVTD_N_F64_U64]]
+//
float64_t test_vcvtd_n_f64_u64(uint64_t a) {
return vcvtd_n_f64_u64(a, 64);
}
-// CHECK-LABEL: @test_vcvts_n_s32_f32(
-// CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
-// CHECK: ret i32 [[VCVTS_N_S32_F32]]
+// CHECK-LABEL: define dso_local i32 @test_vcvts_n_s32_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float [[A]], i32 1)
+// CHECK-NEXT: ret i32 [[VCVTS_N_S32_F32]]
+//
int32_t test_vcvts_n_s32_f32(float32_t a) {
return (int32_t)vcvts_n_s32_f32(a, 1);
}
-// CHECK-LABEL: @test_vcvtd_n_s64_f64(
-// CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
-// CHECK: ret i64 [[VCVTD_N_S64_F64]]
+// CHECK-LABEL: define dso_local i64 @test_vcvtd_n_s64_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double [[A]], i32 1)
+// CHECK-NEXT: ret i64 [[VCVTD_N_S64_F64]]
+//
int64_t test_vcvtd_n_s64_f64(float64_t a) {
return (int64_t)vcvtd_n_s64_f64(a, 1);
}
-// CHECK-LABEL: @test_vcvts_n_u32_f32(
-// CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
-// CHECK: ret i32 [[VCVTS_N_U32_F32]]
+// CHECK-LABEL: define dso_local i32 @test_vcvts_n_u32_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float [[A]], i32 32)
+// CHECK-NEXT: ret i32 [[VCVTS_N_U32_F32]]
+//
uint32_t test_vcvts_n_u32_f32(float32_t a) {
return (uint32_t)vcvts_n_u32_f32(a, 32);
}
-// CHECK-LABEL: @test_vcvtd_n_u64_f64(
-// CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
-// CHECK: ret i64 [[VCVTD_N_U64_F64]]
+// CHECK-LABEL: define dso_local i64 @test_vcvtd_n_u64_f64(
+// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double [[A]], i32 64)
+// CHECK-NEXT: ret i64 [[VCVTD_N_U64_F64]]
+//
uint64_t test_vcvtd_n_u64_f64(float64_t a) {
return (uint64_t)vcvtd_n_u64_f64(a, 64);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
return vreinterpret_s8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
return vreinterpret_s8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
return vreinterpret_s8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
return vreinterpret_s8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
return vreinterpret_s8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
return vreinterpret_s8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
return vreinterpret_s8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
return vreinterpret_s8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
return vreinterpret_s8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
return vreinterpret_s8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_p8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
return vreinterpret_s8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
return vreinterpret_s8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
return vreinterpret_s8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
return vreinterpret_s16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
return vreinterpret_s16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
return vreinterpret_s16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
return vreinterpret_s16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
return vreinterpret_s16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
return vreinterpret_s16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
return vreinterpret_s16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
return vreinterpret_s16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
return vreinterpret_s16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
return vreinterpret_s16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
return vreinterpret_s16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_p16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
return vreinterpret_s16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_s16_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
return vreinterpret_s16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
return vreinterpret_s32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
return vreinterpret_s32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
return vreinterpret_s32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
return vreinterpret_s32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
return vreinterpret_s32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u32(
-// CHECK: ret <2 x i32> %a
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i32> [[A]]
+//
int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
return vreinterpret_s32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
return vreinterpret_s32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
return vreinterpret_s32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
return vreinterpret_s32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
return vreinterpret_s32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
return vreinterpret_s32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
return vreinterpret_s32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_s32_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
return vreinterpret_s32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
return vreinterpret_s64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
return vreinterpret_s64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
return vreinterpret_s64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
return vreinterpret_s64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
return vreinterpret_s64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
return vreinterpret_s64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
return vreinterpret_s64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
return vreinterpret_s64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
return vreinterpret_s64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
return vreinterpret_s64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
return vreinterpret_s64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
return vreinterpret_s64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_p64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_s64_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
return vreinterpret_s64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
return vreinterpret_u8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
return vreinterpret_u8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
return vreinterpret_u8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
return vreinterpret_u8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
return vreinterpret_u8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
return vreinterpret_u8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
return vreinterpret_u8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
return vreinterpret_u8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
return vreinterpret_u8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
return vreinterpret_u8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_p8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
return vreinterpret_u8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
return vreinterpret_u8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_u8_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
return vreinterpret_u8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
return vreinterpret_u16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
return vreinterpret_u16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
return vreinterpret_u16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
return vreinterpret_u16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
return vreinterpret_u16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
return vreinterpret_u16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
return vreinterpret_u16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
return vreinterpret_u16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
return vreinterpret_u16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
return vreinterpret_u16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
return vreinterpret_u16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_p16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
return vreinterpret_u16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_u16_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
return vreinterpret_u16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
return vreinterpret_u32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
return vreinterpret_u32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s32(
-// CHECK: ret <2 x i32> %a
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i32> [[A]]
+//
uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
return vreinterpret_u32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
return vreinterpret_u32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
return vreinterpret_u32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
return vreinterpret_u32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
return vreinterpret_u32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
return vreinterpret_u32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
return vreinterpret_u32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
return vreinterpret_u32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
return vreinterpret_u32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
return vreinterpret_u32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vreinterpret_u32_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
return vreinterpret_u32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
return vreinterpret_u64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
return vreinterpret_u64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
return vreinterpret_u64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
return vreinterpret_u64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
return vreinterpret_u64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
return vreinterpret_u64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
return vreinterpret_u64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
return vreinterpret_u64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
return vreinterpret_u64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
return vreinterpret_u64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
return vreinterpret_u64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
return vreinterpret_u64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_p64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_u64_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
return vreinterpret_u64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
return vreinterpret_f16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
return vreinterpret_f16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
return vreinterpret_f16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
return vreinterpret_f16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
return vreinterpret_f16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
return vreinterpret_f16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
return vreinterpret_f16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
return vreinterpret_f16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
return vreinterpret_f16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
return vreinterpret_f16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
return vreinterpret_f16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
return vreinterpret_f16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vreinterpret_f16_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
return vreinterpret_f16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
return vreinterpret_f32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
return vreinterpret_f32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
return vreinterpret_f32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
return vreinterpret_f32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
return vreinterpret_f32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
return vreinterpret_f32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
return vreinterpret_f32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
return vreinterpret_f32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
return vreinterpret_f32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
return vreinterpret_f32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
return vreinterpret_f32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
return vreinterpret_f32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vreinterpret_f32_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
return vreinterpret_f32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
return vreinterpret_f64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
return vreinterpret_f64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
return vreinterpret_f64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP0]]
+//
float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
return vreinterpret_f64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
return vreinterpret_f64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
return vreinterpret_f64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
return vreinterpret_f64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP0]]
+//
float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
return vreinterpret_f64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
return vreinterpret_f64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
return vreinterpret_f64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
return vreinterpret_f64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP1]]
+//
float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
return vreinterpret_f64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f64_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
-// CHECK: ret <1 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vreinterpret_f64_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP0]]
+//
float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
return vreinterpret_f64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
return vreinterpret_p8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
return vreinterpret_p8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
return vreinterpret_p8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
return vreinterpret_p8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
return vreinterpret_p8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
return vreinterpret_p8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
return vreinterpret_p8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
return vreinterpret_p8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
return vreinterpret_p8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
return vreinterpret_p8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
return vreinterpret_p8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
return vreinterpret_p8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_p8_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
return vreinterpret_p8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
return vreinterpret_p16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
return vreinterpret_p16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
return vreinterpret_p16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
return vreinterpret_p16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
return vreinterpret_p16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
return vreinterpret_p16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
return vreinterpret_p16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
return vreinterpret_p16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
return vreinterpret_p16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
return vreinterpret_p16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
return vreinterpret_p16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
return vreinterpret_p16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vreinterpret_p16_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
return vreinterpret_p16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
return vreinterpret_p64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
return vreinterpret_p64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
return vreinterpret_p64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_s64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
return vreinterpret_p64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
return vreinterpret_p64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
return vreinterpret_p64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
return vreinterpret_p64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_u64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
return vreinterpret_p64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
return vreinterpret_p64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
return vreinterpret_p64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
return vreinterpret_p64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
return vreinterpret_p64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vreinterpret_p64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
return vreinterpret_p64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
return vreinterpretq_s8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
return vreinterpretq_s8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
return vreinterpretq_s8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
return vreinterpretq_s8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
return vreinterpretq_s8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
return vreinterpretq_s8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
return vreinterpretq_s8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
return vreinterpretq_s8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
return vreinterpretq_s8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
return vreinterpretq_s8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_p8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
return vreinterpretq_s8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
return vreinterpretq_s8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_s8_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
return vreinterpretq_s8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
return vreinterpretq_s16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
return vreinterpretq_s16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
return vreinterpretq_s16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
return vreinterpretq_s16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
return vreinterpretq_s16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
return vreinterpretq_s16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
return vreinterpretq_s16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
return vreinterpretq_s16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
return vreinterpretq_s16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
return vreinterpretq_s16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
return vreinterpretq_s16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_p16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
return vreinterpretq_s16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_s16_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
return vreinterpretq_s16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
return vreinterpretq_s32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
return vreinterpretq_s32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
return vreinterpretq_s32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
return vreinterpretq_s32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
return vreinterpretq_s32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u32(
-// CHECK: ret <4 x i32> %a
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i32> [[A]]
+//
int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
return vreinterpretq_s32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
return vreinterpretq_s32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
return vreinterpretq_s32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
return vreinterpretq_s32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
return vreinterpretq_s32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
return vreinterpretq_s32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
return vreinterpretq_s32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_s32_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
return vreinterpretq_s32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
return vreinterpretq_s64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
return vreinterpretq_s64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
return vreinterpretq_s64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
return vreinterpretq_s64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
return vreinterpretq_s64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
return vreinterpretq_s64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
return vreinterpretq_s64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
return vreinterpretq_s64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
return vreinterpretq_s64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
return vreinterpretq_s64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
return vreinterpretq_s64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
return vreinterpretq_s64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_p64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_s64_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
return vreinterpretq_s64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
return vreinterpretq_u8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
return vreinterpretq_u8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
return vreinterpretq_u8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
return vreinterpretq_u8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
return vreinterpretq_u8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
return vreinterpretq_u8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
return vreinterpretq_u8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
return vreinterpretq_u8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
return vreinterpretq_u8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
return vreinterpretq_u8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_p8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
return vreinterpretq_u8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
return vreinterpretq_u8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_u8_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
return vreinterpretq_u8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
return vreinterpretq_u16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
return vreinterpretq_u16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
return vreinterpretq_u16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
return vreinterpretq_u16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
return vreinterpretq_u16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
return vreinterpretq_u16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
return vreinterpretq_u16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
return vreinterpretq_u16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
return vreinterpretq_u16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
return vreinterpretq_u16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
return vreinterpretq_u16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_p16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
return vreinterpretq_u16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_u16_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
return vreinterpretq_u16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
return vreinterpretq_u32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
return vreinterpretq_u32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s32(
-// CHECK: ret <4 x i32> %a
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i32> [[A]]
+//
uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
return vreinterpretq_u32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
return vreinterpretq_u32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
return vreinterpretq_u32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
return vreinterpretq_u32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
return vreinterpretq_u32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
return vreinterpretq_u32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
return vreinterpretq_u32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
return vreinterpretq_u32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
return vreinterpretq_u32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
return vreinterpretq_u32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vreinterpretq_u32_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
return vreinterpretq_u32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
return vreinterpretq_u64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
return vreinterpretq_u64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
return vreinterpretq_u64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
return vreinterpretq_u64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
return vreinterpretq_u64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
return vreinterpretq_u64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
return vreinterpretq_u64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
return vreinterpretq_u64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
return vreinterpretq_u64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
return vreinterpretq_u64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
return vreinterpretq_u64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
return vreinterpretq_u64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_p64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_u64_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
return vreinterpretq_u64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
return vreinterpretq_f16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
return vreinterpretq_f16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
return vreinterpretq_f16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
return vreinterpretq_f16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
return vreinterpretq_f16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
return vreinterpretq_f16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
return vreinterpretq_f16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
return vreinterpretq_f16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
return vreinterpretq_f16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
return vreinterpretq_f16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
return vreinterpretq_f16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
return vreinterpretq_f16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vreinterpretq_f16_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
return vreinterpretq_f16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
return vreinterpretq_f32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
return vreinterpretq_f32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
return vreinterpretq_f32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
return vreinterpretq_f32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
return vreinterpretq_f32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
return vreinterpretq_f32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
return vreinterpretq_f32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
return vreinterpretq_f32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
return vreinterpretq_f32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
return vreinterpretq_f32_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
return vreinterpretq_f32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
return vreinterpretq_f32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vreinterpretq_f32_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
return vreinterpretq_f32_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
return vreinterpretq_f64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
return vreinterpretq_f64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
return vreinterpretq_f64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP0]]
+//
float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
return vreinterpretq_f64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
return vreinterpretq_f64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
return vreinterpretq_f64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
return vreinterpretq_f64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP0]]
+//
float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
return vreinterpretq_f64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
return vreinterpretq_f64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
return vreinterpretq_f64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
return vreinterpretq_f64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP1]]
+//
float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
return vreinterpretq_f64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f64_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
-// CHECK: ret <2 x double> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vreinterpretq_f64_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP0]]
+//
float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
return vreinterpretq_f64_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
return vreinterpretq_p8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
return vreinterpretq_p8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
return vreinterpretq_p8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
return vreinterpretq_p8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
return vreinterpretq_p8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
return vreinterpretq_p8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
return vreinterpretq_p8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
return vreinterpretq_p8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
return vreinterpretq_p8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
return vreinterpretq_p8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
return vreinterpretq_p8_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
return vreinterpretq_p8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vreinterpretq_p8_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
return vreinterpretq_p8_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
return vreinterpretq_p16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
return vreinterpretq_p16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
return vreinterpretq_p16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
return vreinterpretq_p16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
return vreinterpretq_p16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
return vreinterpretq_p16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
return vreinterpretq_p16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
return vreinterpretq_p16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
return vreinterpretq_p16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
return vreinterpretq_p16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
return vreinterpretq_p16_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
return vreinterpretq_p16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vreinterpretq_p16_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
return vreinterpretq_p16_p64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
return vreinterpretq_p64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
return vreinterpretq_p64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
return vreinterpretq_p64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_s64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
return vreinterpretq_p64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
return vreinterpretq_p64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
return vreinterpretq_p64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
return vreinterpretq_p64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_u64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
return vreinterpretq_p64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
return vreinterpretq_p64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
return vreinterpretq_p64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
return vreinterpretq_p64_f64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
return vreinterpretq_p64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vreinterpretq_p64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
return vreinterpretq_p64_p16(a);
}
-// CHECK-LABEL: @test_vabds_f32(
-// CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
-// CHECK: ret float [[VABDS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vabds_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float [[A]], float [[B]])
+// CHECK-NEXT: ret float [[VABDS_F32_I]]
+//
float32_t test_vabds_f32(float32_t a, float32_t b) {
return vabds_f32(a, b);
}
-// CHECK-LABEL: @test_vabdd_f64(
-// CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
-// CHECK: ret double [[VABDD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vabdd_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double [[A]], double [[B]])
+// CHECK-NEXT: ret double [[VABDD_F64_I]]
+//
float64_t test_vabdd_f64(float64_t a, float64_t b) {
return vabdd_f64(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s8(
-// CHECK: entry:
-// CHECK-NEXT: [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK-NEXT: ret <16 x i8> [[V]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuqaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VUQADD_I]]
+//
int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
return vuqaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s32(
-// CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK-NEXT: ret <4 x i32> [[V]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuqaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> [[VUQADD_I]], <4 x i32> [[VUQADD1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VUQADD2_I]]
+//
int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
return vuqaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s64(
-// CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK-NEXT: ret <2 x i64> [[V]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuqaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> [[VUQADD_I]], <2 x i64> [[VUQADD1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VUQADD2_I]]
+//
int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
return vuqaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s16(
-// CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK-NEXT: ret <8 x i16> [[V]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuqaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> [[VUQADD_I]], <8 x i16> [[VUQADD1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VUQADD2_I]]
+//
int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
return vuqaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s8(
-// CHECK: entry:
-// CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK-NEXT: ret <8 x i8> [[V]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuqadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VUQADD_I]]
+//
int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
return vuqadd_s8(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s32(
-// CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK-NEXT: ret <2 x i32> [[V]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuqadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> [[VUQADD_I]], <2 x i32> [[VUQADD1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VUQADD2_I]]
+//
int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
return vuqadd_s32(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: ret <1 x i64> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vuqadd_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VUQADD2_I]]
+//
int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
return vuqadd_s64(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s16(
-// CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK-NEXT: ret <4 x i16> [[V]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuqadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[VUQADD_I]], <4 x i16> [[VUQADD1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VUQADD2_I]]
+//
int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
return vuqadd_s16(a, b);
}
-// CHECK-LABEL: @test_vsqadd_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: ret <1 x i64> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsqadd_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VSQADD2_I]]
+//
uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
return vsqadd_u64(a, b);
}
-// CHECK-LABEL: @test_vsqadd_u8(
-// CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VSQADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vsqadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VSQADD_I]]
+//
uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
return vsqadd_u8(a, b);
}
-// CHECK-LABEL: @test_vsqaddq_u8(
-// CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VSQADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vsqaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VSQADD_I]]
+//
uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
return vsqaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vsqadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vsqadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VSQADD2_I]]
+//
uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
return vsqadd_u16(a, b);
}
-// CHECK-LABEL: @test_vsqaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vsqaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VSQADD2_I]]
+//
uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
return vsqaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vsqadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsqadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VSQADD2_I]]
+//
uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
return vsqadd_u32(a, b);
}
-// CHECK-LABEL: @test_vsqaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsqaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VSQADD2_I]]
+//
uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
return vsqaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vsqaddq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: ret <2 x i64> [[VSQADD2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsqaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VSQADD2_I]]
+//
uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
return vsqaddq_u64(a, b);
}
-// CHECK-LABEL: @test_vabs_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
-// CHECK: ret <1 x i64> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vabs_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]])
+// CHECK-NEXT: ret <1 x i64> [[VABS1_I]]
+//
int64x1_t test_vabs_s64(int64x1_t a) {
return vabs_s64(a);
}
-// CHECK-LABEL: @test_vqabs_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
-// CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQABS_V1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqabs_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]])
+// CHECK-NEXT: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqabs_s64(int64x1_t a) {
return vqabs_s64(a);
}
-// CHECK-LABEL: @test_vqneg_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
-// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQNEG_V1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vqneg_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]])
+// CHECK-NEXT: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqneg_s64(int64x1_t a) {
return vqneg_s64(a);
}
-// CHECK-LABEL: @test_vneg_s64(
-// CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
-// CHECK: ret <1 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vneg_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <1 x i64> [[SUB_I]]
+//
int64x1_t test_vneg_s64(int64x1_t a) {
return vneg_s64(a);
}
-// CHECK-LABEL: @test_vaddv_f32(
-// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VADDV_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vaddv_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VADDV_F32_I]]
+//
float32_t test_vaddv_f32(float32x2_t a) {
return vaddv_f32(a);
}
-// CHECK-LABEL: @test_vaddvq_f32(
-// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
-// CHECK: ret float [[VADDVQ_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vaddvq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: ret float [[VADDVQ_F32_I]]
+//
float32_t test_vaddvq_f32(float32x4_t a) {
return vaddvq_f32(a);
}
-// CHECK-LABEL: @test_vaddvq_f64(
-// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VADDVQ_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vaddvq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VADDVQ_F64_I]]
+//
float64_t test_vaddvq_f64(float64x2_t a) {
return vaddvq_f64(a);
}
-// CHECK-LABEL: @test_vmaxv_f32(
-// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VMAXV_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vmaxv_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VMAXV_F32_I]]
+//
float32_t test_vmaxv_f32(float32x2_t a) {
return vmaxv_f32(a);
}
-// CHECK-LABEL: @test_vmaxvq_f64(
-// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VMAXVQ_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vmaxvq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VMAXVQ_F64_I]]
+//
float64_t test_vmaxvq_f64(float64x2_t a) {
return vmaxvq_f64(a);
}
-// CHECK-LABEL: @test_vminv_f32(
-// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VMINV_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vminv_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VMINV_F32_I]]
+//
float32_t test_vminv_f32(float32x2_t a) {
return vminv_f32(a);
}
-// CHECK-LABEL: @test_vminvq_f64(
-// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VMINVQ_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vminvq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VMINVQ_F64_I]]
+//
float64_t test_vminvq_f64(float64x2_t a) {
return vminvq_f64(a);
}
-// CHECK-LABEL: @test_vmaxnmvq_f64(
-// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VMAXNMVQ_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vmaxnmvq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VMAXNMVQ_F64_I]]
+//
float64_t test_vmaxnmvq_f64(float64x2_t a) {
return vmaxnmvq_f64(a);
}
-// CHECK-LABEL: @test_vmaxnmv_f32(
-// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VMAXNMV_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vmaxnmv_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VMAXNMV_F32_I]]
+//
float32_t test_vmaxnmv_f32(float32x2_t a) {
return vmaxnmv_f32(a);
}
-// CHECK-LABEL: @test_vminnmvq_f64(
-// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
-// CHECK: ret double [[VMINNMVQ_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vminnmvq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
+// CHECK-NEXT: ret double [[VMINNMVQ_F64_I]]
+//
float64_t test_vminnmvq_f64(float64x2_t a) {
return vminnmvq_f64(a);
}
-// CHECK-LABEL: @test_vminnmv_f32(
-// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
-// CHECK: ret float [[VMINNMV_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vminnmv_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: ret float [[VMINNMV_F32_I]]
+//
float32_t test_vminnmv_f32(float32x2_t a) {
return vminnmv_f32(a);
}
-// CHECK-LABEL: @test_vpaddq_s64(
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
return vpaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vpaddq_u64(
-// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]])
+// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
return vpaddq_u64(a, b);
}
-// CHECK-LABEL: @test_vpaddd_u64(
-// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
-// CHECK: ret i64 [[VPADDD_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vpaddd_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT: ret i64 [[VPADDD_U64_I]]
+//
uint64_t test_vpaddd_u64(uint64x2_t a) {
return vpaddd_u64(a);
}
-// CHECK-LABEL: @test_vaddvq_s64(
-// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
-// CHECK: ret i64 [[VADDVQ_S64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddvq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT: ret i64 [[VADDVQ_S64_I]]
+//
int64_t test_vaddvq_s64(int64x2_t a) {
return vaddvq_s64(a);
}
-// CHECK-LABEL: @test_vaddvq_u64(
-// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
-// CHECK: ret i64 [[VADDVQ_U64_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddvq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT: ret i64 [[VADDVQ_U64_I]]
+//
uint64_t test_vaddvq_u64(uint64x2_t a) {
return vaddvq_u64(a);
}
-// CHECK-LABEL: @test_vadd_f64(
-// CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
-// CHECK: ret <1 x double> [[ADD_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vadd_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <1 x double> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x double> [[ADD_I]]
+//
float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
return vadd_f64(a, b);
}
-// CHECK-LABEL: @test_vmul_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
-// CHECK: ret <1 x double> [[MUL_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmul_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x double> [[MUL_I]]
+//
float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
return vmul_f64(a, b);
}
-// CHECK-LABEL: @test_vdiv_f64(
-// CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
-// CHECK: ret <1 x double> [[DIV_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vdiv_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <1 x double> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x double> [[DIV_I]]
+//
float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
return vdiv_f64(a, b);
}
-// CHECK-LABEL: @test_vmla_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
-// CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
-// CHECK: ret <1 x double> [[ADD_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmla_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <1 x double> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <1 x double> [[ADD_I]]
+//
float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmla_f64(a, b, c);
}
-// CHECK-LABEL: @test_vmls_f64(
-// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
-// CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
-// CHECK: ret <1 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmls_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <1 x double> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <1 x double> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <1 x double> [[SUB_I]]
+//
float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmls_f64(a, b, c);
}
-// CHECK-LABEL: @test_vfma_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
-// CHECK: ret <1 x double> [[TMP3]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfma_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// CHECK-NEXT: [[__P2_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// CHECK-NEXT: ret <1 x double> [[TMP9]]
+//
float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfma_f64(a, b, c);
}
-// CHECK-LABEL: @test_vfms_f64(
-// CHECK: [[SUB_I:%.*]] = fneg <1 x double> %b
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
-// CHECK: ret <1 x double> [[TMP3]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfms_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[B]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[C]] to i64
+// CHECK-NEXT: [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__P2_ADDR_I_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], <1 x double> [[TMP6]])
+// CHECK-NEXT: ret <1 x double> [[TMP9]]
+//
float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfms_f64(a, b, c);
}
-// CHECK-LABEL: @test_vsub_f64(
-// CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
-// CHECK: ret <1 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vsub_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <1 x double> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x double> [[SUB_I]]
+//
float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
return vsub_f64(a, b);
}
-// CHECK-LABEL: @test_vabd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VABD2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vabd_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]])
+// CHECK-NEXT: ret <1 x double> [[VABD2_I]]
+//
float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
return vabd_f64(a, b);
}
-// CHECK-LABEL: @test_vmax_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VMAX2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmax_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]])
+// CHECK-NEXT: ret <1 x double> [[VMAX2_I]]
+//
float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
return vmax_f64(a, b);
}
-// CHECK-LABEL: @test_vmin_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VMIN2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmin_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]])
+// CHECK-NEXT: ret <1 x double> [[VMIN2_I]]
+//
float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
return vmin_f64(a, b);
}
-// CHECK-LABEL: @test_vmaxnm_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VMAXNM2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmaxnm_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]])
+// CHECK-NEXT: ret <1 x double> [[VMAXNM2_I]]
+//
float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
return vmaxnm_f64(a, b);
}
-// CHECK-LABEL: @test_vminnm_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VMINNM2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vminnm_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]])
+// CHECK-NEXT: ret <1 x double> [[VMINNM2_I]]
+//
float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
return vminnm_f64(a, b);
}
-// CHECK-LABEL: @test_vabs_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vabs_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]])
+// CHECK-NEXT: ret <1 x double> [[VABS1_I]]
+//
float64x1_t test_vabs_f64(float64x1_t a) {
return vabs_f64(a);
}
-// CHECK-LABEL: @test_vneg_f64(
-// CHECK: [[SUB_I:%.*]] = fneg <1 x double> %a
-// CHECK: ret <1 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vneg_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[A]]
+// CHECK-NEXT: ret <1 x double> [[FNEG_I]]
+//
float64x1_t test_vneg_f64(float64x1_t a) {
return vneg_f64(a);
}
-// CHECK-LABEL: @test_vcvt_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> [[VCVTZ_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
int64x1_t test_vcvt_s64_f64(float64x1_t a) {
return vcvt_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvt_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> [[VCVTZ_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTZ1_I]]
+//
uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
return vcvt_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtn_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtn_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTN1_I]]
+//
int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
return vcvtn_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtn_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtn_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTN1_I]]
+//
uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
return vcvtn_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtp_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtp_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTP1_I]]
+//
int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
return vcvtp_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtp_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtp_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTP1_I]]
+//
uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
return vcvtp_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtm_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtm_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTM1_I]]
+//
int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
return vcvtm_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtm_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtm_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTM1_I]]
+//
uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
return vcvtm_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvta_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvta_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTA1_I]]
+//
int64x1_t test_vcvta_s64_f64(float64x1_t a) {
return vcvta_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvta_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
-// CHECK: ret <1 x i64> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvta_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]])
+// CHECK-NEXT: ret <1 x i64> [[VCVTA1_I]]
+//
uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
return vcvta_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvt_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
-// CHECK: ret <1 x double> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vcvt_f64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[VCVT_I]]
+//
float64x1_t test_vcvt_f64_s64(int64x1_t a) {
return vcvt_f64_s64(a);
}
-// CHECK-LABEL: @test_vcvt_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
-// CHECK: ret <1 x double> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vcvt_f64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[VCVT_I]]
+//
float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
return vcvt_f64_u64(a);
}
-// CHECK-LABEL: @test_vcvt_n_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
-// CHECK: ret <1 x i64> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_n_s64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// CHECK-NEXT: ret <1 x i64> [[VCVT_N1]]
+//
int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
return vcvt_n_s64_f64(a, 64);
}
-// CHECK-LABEL: @test_vcvt_n_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
-// CHECK: ret <1 x i64> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_n_u64_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// CHECK-NEXT: ret <1 x i64> [[VCVT_N1]]
+//
uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
return vcvt_n_u64_f64(a, 64);
}
-// CHECK-LABEL: @test_vcvt_n_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
-// CHECK: ret <1 x double> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vcvt_n_f64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// CHECK-NEXT: ret <1 x double> [[VCVT_N1]]
+//
float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
return vcvt_n_f64_s64(a, 64);
}
-// CHECK-LABEL: @test_vcvt_n_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
-// CHECK: ret <1 x double> [[VCVT_N1]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vcvt_n_f64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// CHECK-NEXT: ret <1 x double> [[VCVT_N1]]
+//
float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
return vcvt_n_f64_u64(a, 64);
}
-// CHECK-LABEL: @test_vrndn_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDN1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrndn_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> [[VRNDN_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDN1_I]]
+//
float64x1_t test_vrndn_f64(float64x1_t a) {
return vrndn_f64(a);
}
-// CHECK-LABEL: @test_vrnda_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDA1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnda_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDA1_I]]
+//
float64x1_t test_vrnda_f64(float64x1_t a) {
return vrnda_f64(a);
}
-// CHECK-LABEL: @test_vrndp_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDP1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrndp_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDP1_I]]
+//
float64x1_t test_vrndp_f64(float64x1_t a) {
return vrndp_f64(a);
}
-// CHECK-LABEL: @test_vrndm_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDM1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrndm_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDM1_I]]
+//
float64x1_t test_vrndm_f64(float64x1_t a) {
return vrndm_f64(a);
}
-// CHECK-LABEL: @test_vrndx_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDX1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrndx_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDX1_I]]
+//
float64x1_t test_vrndx_f64(float64x1_t a) {
return vrndx_f64(a);
}
-// CHECK-LABEL: @test_vrnd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDZ1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnd_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDZ1_I]]
+//
float64x1_t test_vrnd_f64(float64x1_t a) {
return vrnd_f64(a);
}
-// CHECK-LABEL: @test_vrndi_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRNDI1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrndi_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRNDI_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRNDI_V1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_V_I]])
+// CHECK-NEXT: ret <1 x double> [[VRNDI_V1_I]]
+//
float64x1_t test_vrndi_f64(float64x1_t a) {
return vrndi_f64(a);
}
-// CHECK-LABEL: @test_vrsqrte_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRSQRTE_V1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrsqrte_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]])
+// CHECK-NEXT: ret <1 x double> [[VRSQRTE_V1_I]]
+//
float64x1_t test_vrsqrte_f64(float64x1_t a) {
return vrsqrte_f64(a);
}
-// CHECK-LABEL: @test_vrecpe_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VRECPE_V1_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrecpe_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]])
+// CHECK-NEXT: ret <1 x double> [[VRECPE_V1_I]]
+//
float64x1_t test_vrecpe_f64(float64x1_t a) {
return vrecpe_f64(a);
}
-// CHECK-LABEL: @test_vsqrt_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[VSQRT_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vsqrt_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP2]])
+// CHECK-NEXT: ret <1 x double> [[VSQRT_I]]
+//
float64x1_t test_vsqrt_f64(float64x1_t a) {
return vsqrt_f64(a);
}
-// CHECK-LABEL: @test_vrecps_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: ret <1 x double> [[VRECPS_V2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrecps_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]])
+// CHECK-NEXT: [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP4]], i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP5]]
+//
float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
return vrecps_f64(a, b);
}
-// CHECK-LABEL: @test_vrsqrts_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
-// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
-// CHECK: ret <1 x double> [[VRSQRTS_V2_I]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrsqrts_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__P1_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__P1_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]])
+// CHECK-NEXT: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP4]], i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP5]]
+//
float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
return vrsqrts_f64(a, b);
}
-// CHECK-LABEL: @test_vminv_s32(
-// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VMINV_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vminv_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VMINV_S32_I]]
+//
int32_t test_vminv_s32(int32x2_t a) {
return vminv_s32(a);
}
-// CHECK-LABEL: @test_vminv_u32(
-// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VMINV_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vminv_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VMINV_U32_I]]
+//
uint32_t test_vminv_u32(uint32x2_t a) {
return vminv_u32(a);
}
-// CHECK-LABEL: @test_vmaxv_s32(
-// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VMAXV_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vmaxv_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VMAXV_S32_I]]
+//
int32_t test_vmaxv_s32(int32x2_t a) {
return vmaxv_s32(a);
}
-// CHECK-LABEL: @test_vmaxv_u32(
-// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VMAXV_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vmaxv_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VMAXV_U32_I]]
+//
uint32_t test_vmaxv_u32(uint32x2_t a) {
return vmaxv_u32(a);
}
-// CHECK-LABEL: @test_vaddv_s32(
-// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VADDV_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vaddv_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VADDV_S32_I]]
+//
int32_t test_vaddv_s32(int32x2_t a) {
return vaddv_s32(a);
}
-// CHECK-LABEL: @test_vaddv_u32(
-// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
-// CHECK: ret i32 [[VADDV_U32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vaddv_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i32 [[VADDV_U32_I]]
+//
uint32_t test_vaddv_u32(uint32x2_t a) {
return vaddv_u32(a);
}
-// CHECK-LABEL: @test_vaddlv_s32(
-// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
-// CHECK: ret i64 [[VADDLV_S32_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddlv_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i64 [[VADDLV_S32_I]]
+//
int64_t test_vaddlv_s32(int32x2_t a) {
return vaddlv_s32(a);
}
-// CHECK-LABEL: @test_vaddlv_u32(
-// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
-// CHECK: ret i64 [[VADDLV_U32_I]]
+// CHECK-LABEL: define dso_local i64 @test_vaddlv_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT: ret i64 [[VADDLV_U32_I]]
+//
uint64_t test_vaddlv_u32(uint32x2_t a) {
return vaddlv_u32(a);
}
diff --git a/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c b/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c
index 40c5a0a598d68..29bfaabbe83cd 100644
--- a/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c
+++ b/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
// RUN: -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -34,10 +34,11 @@ int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) {
// CHECK-LABEL: @test_vldap1q_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
-// CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
+// CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP3]], i32 1
// CHECK-NEXT: ret <2 x double> [[VLDAP1_LANE]]
//
float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) {
@@ -82,10 +83,12 @@ int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) {
// CHECK-LABEL: @test_vldap1_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
-// CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
+// CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP2]], double [[TMP3]], i32 0
// CHECK-NEXT: ret <1 x double> [[VLDAP1_LANE]]
//
float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) {
@@ -130,10 +133,11 @@ void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) {
// CHECK-LABEL: @test_vstl1q_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-// CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
+// CHECK-NEXT: store atomic double [[TMP3]], ptr [[A:%.*]] release, align 8
// CHECK-NEXT: ret void
//
void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) {
@@ -178,10 +182,12 @@ void test_vstl1_lane_s64(int64_t *a, int64x1_t b) {
// CHECK-LABEL: @test_vstl1_lane_f64(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x double> [[TMP2]], i32 0
+// CHECK-NEXT: store atomic double [[TMP3]], ptr [[A:%.*]] release, align 8
// CHECK-NEXT: ret void
//
void test_vstl1_lane_f64(float64_t *a, float64x1_t b) {
diff --git a/clang/test/CodeGen/AArch64/neon-ldst-one.c b/clang/test/CodeGen/AArch64/neon-ldst-one.c
index b57df40d8e5c9..2cff007826ba6 100644
--- a/clang/test/CodeGen/AArch64/neon-ldst-one.c
+++ b/clang/test/CodeGen/AArch64/neon-ldst-one.c
@@ -1,5757 +1,4962 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_u8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
uint8x16_t test_vld1q_dup_u8(uint8_t *a) {
return vld1q_dup_u8(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_u16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_dup_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
uint16x8_t test_vld1q_dup_u16(uint16_t *a) {
return vld1q_dup_u16(a);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_dup_u32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_dup_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
uint32x4_t test_vld1q_dup_u32(uint32_t *a) {
return vld1q_dup_u32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_u64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
uint64x2_t test_vld1q_dup_u64(uint64_t *a) {
return vld1q_dup_u64(a);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_s8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
int8x16_t test_vld1q_dup_s8(int8_t *a) {
return vld1q_dup_s8(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_s16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_dup_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
int16x8_t test_vld1q_dup_s16(int16_t *a) {
return vld1q_dup_s16(a);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_dup_s32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_dup_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
int32x4_t test_vld1q_dup_s32(int32_t *a) {
return vld1q_dup_s32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_s64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
int64x2_t test_vld1q_dup_s64(int64_t *a) {
return vld1q_dup_s64(a);
}
-// CHECK-LABEL: define{{.*}} <8 x half> @test_vld1q_dup_f16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load half, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <8 x half> poison, half [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x half> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vld1q_dup_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x half> [[LANE]]
+//
float16x8_t test_vld1q_dup_f16(float16_t *a) {
return vld1q_dup_f16(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vld1q_dup_f32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load float, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x float> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vld1q_dup_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x float> [[LANE]]
+//
float32x4_t test_vld1q_dup_f32(float32_t *a) {
return vld1q_dup_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vld1q_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load double, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x double> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vld1q_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x double> [[LANE]]
+//
float64x2_t test_vld1q_dup_f64(float64_t *a) {
return vld1q_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_p8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
poly8x16_t test_vld1q_dup_p8(poly8_t *a) {
return vld1q_dup_p8(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_p16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_dup_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
poly16x8_t test_vld1q_dup_p16(poly16_t *a) {
return vld1q_dup_p16(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
poly64x2_t test_vld1q_dup_p64(poly64_t *a) {
return vld1q_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_u8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
uint8x8_t test_vld1_dup_u8(uint8_t *a) {
return vld1_dup_u8(a);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_u16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_dup_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
uint16x4_t test_vld1_dup_u16(uint16_t *a) {
return vld1_dup_u16(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_dup_u32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_dup_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
uint32x2_t test_vld1_dup_u32(uint32_t *a) {
return vld1_dup_u32(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_u64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
uint64x1_t test_vld1_dup_u64(uint64_t *a) {
return vld1_dup_u64(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_s8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
int8x8_t test_vld1_dup_s8(int8_t *a) {
return vld1_dup_s8(a);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_s16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_dup_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
int16x4_t test_vld1_dup_s16(int16_t *a) {
return vld1_dup_s16(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_dup_s32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_dup_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
int32x2_t test_vld1_dup_s32(int32_t *a) {
return vld1_dup_s32(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_s64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
int64x1_t test_vld1_dup_s64(int64_t *a) {
return vld1_dup_s64(a);
}
-// CHECK-LABEL: define{{.*}} <4 x half> @test_vld1_dup_f16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load half, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x half> poison, half [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x half> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vld1_dup_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x half> [[LANE]]
+//
float16x4_t test_vld1_dup_f16(float16_t *a) {
return vld1_dup_f16(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vld1_dup_f32(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load float, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x float> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vld1_dup_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x float> [[LANE]]
+//
float32x2_t test_vld1_dup_f32(float32_t *a) {
return vld1_dup_f32(a);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vld1_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load double, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x double> [[LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vld1_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x double> [[LANE]]
+//
float64x1_t test_vld1_dup_f64(float64_t *a) {
return vld1_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_p8(ptr noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
poly8x8_t test_vld1_dup_p8(poly8_t *a) {
return vld1_dup_p8(a);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_p16(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_dup_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
poly16x4_t test_vld1_dup_p16(poly16_t *a) {
return vld1_dup_p16(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a
-// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
poly64x1_t test_vld1_dup_p64(poly64_t *a) {
return vld1_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_dup_u64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x2_t @test_vld2q_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint64x2x2_t test_vld2q_dup_u64(uint64_t *a) {
return vld2q_dup_u64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_dup_s64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x2_t @test_vld2q_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int64x2x2_t test_vld2q_dup_s64(int64_t *a) {
return vld2q_dup_s64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x2_t @test_vld2q_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T:%.*]] poison, <2 x double> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x2x2_t test_vld2q_dup_f64(float64_t *a) {
return vld2q_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld2q_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x2x2_t test_vld2q_dup_p64(poly64_t *a) {
return vld2q_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x2_t @test_vld2_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x1x2_t test_vld2_dup_f64(float64_t *a) {
return vld2_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld2_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x1x2_t test_vld2_dup_p64(poly64_t *a) {
return vld2_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_dup_u64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x3_t @test_vld3q_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) {
return vld3q_dup_u64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_dup_s64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x3_t @test_vld3q_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int64x2x3_t test_vld3q_dup_s64(int64_t *a) {
return vld3q_dup_s64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x3_t @test_vld3q_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T:%.*]] poison, <2 x double> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x2x3_t test_vld3q_dup_f64(float64_t *a) {
return vld3q_dup_f64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld3q_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) {
return vld3q_dup_p64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x3_t @test_vld3_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x1x3_t test_vld3_dup_f64(float64_t *a) {
return vld3_dup_f64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld3_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x1x3_t test_vld3_dup_p64(poly64_t *a) {
return vld3_dup_p64(a);
// [{{x[0-9]+|sp}}]
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_dup_u64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x4_t @test_vld4q_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint64x2x4_t test_vld4q_dup_u64(uint64_t *a) {
return vld4q_dup_u64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_dup_s64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.int64x2x4_t @test_vld4q_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int64x2x4_t test_vld4q_dup_s64(int64_t *a) {
return vld4q_dup_s64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x2x4_t @test_vld4q_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T:%.*]] poison, <2 x double> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x double> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x2x4_t test_vld4q_dup_f64(float64_t *a) {
return vld4q_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld4q_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x2x4_t test_vld4q_dup_p64(poly64_t *a) {
return vld4q_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_dup_f64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.float64x1x4_t @test_vld4_dup_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x double> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x1x4_t test_vld4_dup_f64(float64_t *a) {
return vld4_dup_f64(a);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_dup_p64(ptr noundef %a) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld4_dup_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x1x4_t test_vld4_dup_p64(poly64_t *a) {
return vld4_dup_p64(a);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_u8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
uint8x16_t test_vld1q_lane_u8(uint8_t *a, uint8x16_t b) {
return vld1q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_u16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
uint16x8_t test_vld1q_lane_u16(uint16_t *a, uint16x8_t b) {
return vld1q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_lane_u32(ptr noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
-// CHECK: ret <4 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VLD1_LANE]]
+//
uint32x4_t test_vld1q_lane_u32(uint32_t *a, uint32x4_t b) {
return vld1q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_u64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1
-// CHECK: ret <2 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VLD1_LANE]]
+//
uint64x2_t test_vld1q_lane_u64(uint64_t *a, uint64x2_t b) {
return vld1q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_s8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
int8x16_t test_vld1q_lane_s8(int8_t *a, int8x16_t b) {
return vld1q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_s16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
int16x8_t test_vld1q_lane_s16(int16_t *a, int16x8_t b) {
return vld1q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_lane_s32(ptr noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
-// CHECK: ret <4 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vld1q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VLD1_LANE]]
+//
int32x4_t test_vld1q_lane_s32(int32_t *a, int32x4_t b) {
return vld1q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_s64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1
-// CHECK: ret <2 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VLD1_LANE]]
+//
int64x2_t test_vld1q_lane_s64(int64_t *a, int64x2_t b) {
return vld1q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <8 x half> @test_vld1q_lane_f16(ptr noundef %a, <8 x half> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: [[TMP4:%.*]] = load half, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
-// CHECK: ret <8 x half> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vld1q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP3]], i32 7
+// CHECK-NEXT: ret <8 x half> [[VLD1_LANE]]
+//
float16x8_t test_vld1q_lane_f16(float16_t *a, float16x8_t b) {
return vld1q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vld1q_lane_f32(ptr noundef %a, <4 x float> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[TMP4:%.*]] = load float, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
-// CHECK: ret <4 x float> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vld1q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VLD1_LANE]]
+//
float32x4_t test_vld1q_lane_f32(float32_t *a, float32x4_t b) {
return vld1q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vld1q_lane_f64(ptr noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK: [[TMP4:%.*]] = load double, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP4]], i32 1
-// CHECK: ret <2 x double> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vld1q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP3]], i32 1
+// CHECK-NEXT: ret <2 x double> [[VLD1_LANE]]
+//
float64x2_t test_vld1q_lane_f64(float64_t *a, float64x2_t b) {
return vld1q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_p8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
poly8x16_t test_vld1q_lane_p8(poly8_t *a, poly8x16_t b) {
return vld1q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_p16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vld1q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
poly16x8_t test_vld1q_lane_p16(poly16_t *a, poly16x8_t b) {
return vld1q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_p64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1
-// CHECK: ret <2 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VLD1_LANE]]
+//
poly64x2_t test_vld1q_lane_p64(poly64_t *a, poly64x2_t b) {
return vld1q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_u8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
uint8x8_t test_vld1_lane_u8(uint8_t *a, uint8x8_t b) {
return vld1_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_u16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
uint16x4_t test_vld1_lane_u16(uint16_t *a, uint16x4_t b) {
return vld1_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_lane_u32(ptr noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
-// CHECK: ret <2 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VLD1_LANE]]
+//
uint32x2_t test_vld1_lane_u32(uint32_t *a, uint32x2_t b) {
return vld1_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_u64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
-// CHECK: ret <1 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VLD1_LANE]]
+//
uint64x1_t test_vld1_lane_u64(uint64_t *a, uint64x1_t b) {
return vld1_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_s8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
int8x8_t test_vld1_lane_s8(int8_t *a, int8x8_t b) {
return vld1_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_s16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
int16x4_t test_vld1_lane_s16(int16_t *a, int16x4_t b) {
return vld1_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_lane_s32(ptr noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
-// CHECK: ret <2 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vld1_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VLD1_LANE]]
+//
int32x2_t test_vld1_lane_s32(int32_t *a, int32x2_t b) {
return vld1_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_s64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
-// CHECK: ret <1 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VLD1_LANE]]
+//
int64x1_t test_vld1_lane_s64(int64_t *a, int64x1_t b) {
return vld1_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <4 x half> @test_vld1_lane_f16(ptr noundef %a, <4 x half> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: [[TMP4:%.*]] = load half, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
-// CHECK: ret <4 x half> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vld1_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP3]], i32 3
+// CHECK-NEXT: ret <4 x half> [[VLD1_LANE]]
+//
float16x4_t test_vld1_lane_f16(float16_t *a, float16x4_t b) {
return vld1_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vld1_lane_f32(ptr noundef %a, <2 x float> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[TMP4:%.*]] = load float, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
-// CHECK: ret <2 x float> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vld1_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i32 1
+// CHECK-NEXT: ret <2 x float> [[VLD1_LANE]]
+//
float32x2_t test_vld1_lane_f32(float32_t *a, float32x2_t b) {
return vld1_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vld1_lane_f64(ptr noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: [[TMP4:%.*]] = load double, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x double> [[TMP2]], double [[TMP4]], i32 0
-// CHECK: ret <1 x double> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vld1_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x double> [[TMP2]], double [[TMP3]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VLD1_LANE]]
+//
float64x1_t test_vld1_lane_f64(float64_t *a, float64x1_t b) {
return vld1_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_p8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
poly8x8_t test_vld1_lane_p8(poly8_t *a, poly8x8_t b) {
return vld1_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_p16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vld1_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
poly16x4_t test_vld1_lane_p16(poly16_t *a, poly16x4_t b) {
return vld1_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_p64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
-// CHECK: ret <1 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 8
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VLD1_LANE]]
+//
poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) {
return vld1_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x16x2_t @test_vld2q_lane_s8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[SRC]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vld2q_lane_s8(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [2 x <16 x i8>] alignstack(16) [[SRC_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 0
+// CHECK-NEXT: [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[SRC_COERCE_FCA_0_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[PTR]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) {
return vld2q_lane_s8(ptr, src, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x2_t @test_vld2q_lane_u8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[SRC]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vld2q_lane_u8(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [2 x <16 x i8>] alignstack(16) [[SRC_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 0
+// CHECK-NEXT: [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[SRC_COERCE_FCA_0_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[PTR]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) {
return vld2q_lane_u8(ptr, src, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x2_t @test_vld2q_lane_p8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[SRC]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr)
-// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vld2q_lane_p8(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [2 x <16 x i8>] alignstack(16) [[SRC_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 0
+// CHECK-NEXT: [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[SRC_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[SRC_COERCE_FCA_0_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[PTR]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) {
return vld2q_lane_p8(ptr, src, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x16x3_t @test_vld3q_lane_s8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[SRC]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %ptr)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.int8x16x3_t @test_vld3q_lane_s8(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [3 x <16 x i8>] alignstack(16) [[SRC_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 0
+// CHECK-NEXT: [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 1
+// CHECK-NEXT: [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[SRC_COERCE_FCA_0_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_1_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[PTR]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) {
return vld3q_lane_s8(ptr, src, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x3_t @test_vld3q_lane_u8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[SRC]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %ptr)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x3_t @test_vld3q_lane_u8(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [3 x <16 x i8>] alignstack(16) [[SRC_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SRC_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 0
+// CHECK-NEXT: [[SRC_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 1
+// CHECK-NEXT: [[SRC_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[SRC_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[SRC_COERCE_FCA_0_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_1_EXTRACT]], <16 x i8> [[SRC_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[PTR]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) {
return vld3q_lane_u8(ptr, src, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x2_t @test_vld2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vld2q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) {
return vld2q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x2_t @test_vld2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vld2q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) {
return vld2q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x2_t @test_vld2q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) {
return vld2q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x8x2_t @test_vld2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vld2q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) {
return vld2q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x4x2_t @test_vld2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vld2q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) {
return vld2q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int64x2x2_t @test_vld2q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) {
return vld2q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x8x2_t @test_vld2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], i64 7, ptr %a)
-// CHECK: store { <8 x half>, <8 x half> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float16x8x2_t @test_vld2q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0(<8 x half> [[TMP4]], <8 x half> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] poison, <8 x half> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) {
return vld2q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x4x2_t @test_vld2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vld2q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> [[TMP4]], <4 x float> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) {
return vld2q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float64x2x2_t @test_vld2q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> [[TMP4]], <2 x double> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T:%.*]] poison, <2 x double> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) {
return vld2q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x2_t @test_vld2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vld2q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) {
return vld2q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld2q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) {
return vld2q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x2_t @test_vld2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vld2_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) {
return vld2_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x2_t @test_vld2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vld2_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) {
return vld2_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x2_t @test_vld2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vld2_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) {
return vld2_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x2_t @test_vld2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x2_t @test_vld2_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) {
return vld2_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x8x2_t @test_vld2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vld2_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) {
return vld2_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x4x2_t @test_vld2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vld2_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) {
return vld2_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x2x2_t @test_vld2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vld2_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) {
return vld2_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x1x2_t @test_vld2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.int64x1x2_t @test_vld2_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) {
return vld2_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x4x2_t @test_vld2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x half>, <4 x half> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float16x4x2_t @test_vld2_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0(<4 x half> [[TMP4]], <4 x half> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T:%.*]] poison, <4 x half> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) {
return vld2_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x2x2_t @test_vld2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], i64 1, ptr %a)
-// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vld2_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> [[TMP4]], <2 x float> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) {
return vld2_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
-// CHECK: [[VLD2_LANE:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], i64 0, ptr %a)
-// CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.float64x1x2_t @test_vld2_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> [[TMP4]], <1 x double> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) {
return vld2_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x2_t @test_vld2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vld2_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) {
return vld2_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x2_t @test_vld2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vld2_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) {
return vld2_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x2_t [[TMP13]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld2_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) {
return vld2_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x3_t @test_vld3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x3_t @test_vld3q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) {
return vld3q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x3_t @test_vld3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x3_t @test_vld3q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T:%.*]] poison, <4 x i32> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) {
return vld3q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x3_t @test_vld3q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) {
return vld3q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x8x3_t @test_vld3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int16x8x3_t @test_vld3q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) {
return vld3q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x4x3_t @test_vld3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int32x4x3_t @test_vld3q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T:%.*]] poison, <4 x i32> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) {
return vld3q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int64x2x3_t @test_vld3q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) {
return vld3q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x8x3_t @test_vld3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0(<8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i64 7, ptr %a)
-// CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float16x8x3_t @test_vld3q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0(<8 x half> [[TMP6]], <8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T:%.*]] poison, <8 x half> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x half> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) {
return vld3q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x4x3_t @test_vld3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float32x4x3_t @test_vld3q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> [[TMP6]], <4 x float> [[TMP7]], <4 x float> [[TMP8]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T:%.*]] poison, <4 x float> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x float> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) {
return vld3q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float64x2x3_t @test_vld3q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x double> [[TMP8]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T:%.*]] poison, <2 x double> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) {
return vld3q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x3_t @test_vld3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x3_t @test_vld3q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) {
return vld3q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x3_t @test_vld3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x3_t @test_vld3q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T:%.*]] poison, <8 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) {
return vld3q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld3q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) {
return vld3q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x3_t @test_vld3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x3_t @test_vld3_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) {
return vld3_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x3_t @test_vld3_lane_u16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x3_t @test_vld3_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) {
return vld3_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x3_t @test_vld3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x3_t @test_vld3_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T:%.*]] poison, <2 x i32> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) {
return vld3_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x3_t @test_vld3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x3_t @test_vld3_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) {
return vld3_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x8x3_t @test_vld3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.int8x8x3_t @test_vld3_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) {
return vld3_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x4x3_t @test_vld3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int16x4x3_t @test_vld3_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) {
return vld3_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x2x3_t @test_vld3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int32x2x3_t @test_vld3_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T:%.*]] poison, <2 x i32> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) {
return vld3_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x1x3_t @test_vld3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.int64x1x3_t @test_vld3_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_INT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) {
return vld3_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x4x3_t @test_vld3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0(<4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float16x4x3_t @test_vld3_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0(<4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T:%.*]] poison, <4 x half> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x half> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) {
return vld3_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x2x3_t @test_vld3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i64 1, ptr %a)
-// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float32x2x3_t @test_vld3_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T:%.*]] poison, <2 x float> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x float> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) {
return vld3_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double>
-// CHECK: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> [[TMP10]], <1 x double> [[TMP11]], <1 x double> [[TMP12]], i64 0, ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.float64x1x3_t @test_vld3_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> [[TMP6]], <1 x double> [[TMP7]], <1 x double> [[TMP8]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) {
return vld3_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x3_t @test_vld3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x3_t [[TMP9]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x3_t @test_vld3_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) {
return vld3_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x3_t @test_vld3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x3_t @test_vld3_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T:%.*]] poison, <4 x i16> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) {
return vld3_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x3_t [[TMP16]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld3_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) {
return vld3_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x4_t @test_vld4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x4_t @test_vld4q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) {
return vld4q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x4_t @test_vld4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x4_t @test_vld4q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) {
return vld4q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x4_t @test_vld4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x4_t @test_vld4q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T:%.*]] poison, <4 x i32> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i32> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) {
return vld4q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint64x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint64x2x4_t @test_vld4q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) {
return vld4q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x16x4_t @test_vld4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int8x16x4_t @test_vld4q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) {
return vld4q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x8x4_t @test_vld4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int16x8x4_t @test_vld4q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) {
return vld4q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x4x4_t @test_vld4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int32x4x4_t @test_vld4q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T:%.*]] poison, <4 x i32> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i32> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i32> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) {
return vld4q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int64x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int64x2x4_t @test_vld4q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) {
return vld4q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x8x4_t @test_vld4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0(<8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i64 7, ptr %a)
-// CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float16x8x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float16x8x4_t @test_vld4q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T:%.*]] poison, <8 x half> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x half> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x half> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) {
return vld4q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x4x4_t @test_vld4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float32x4x4_t @test_vld4q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T:%.*]] poison, <4 x float> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x float> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x float> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) {
return vld4q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <2 x double> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x double>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float64x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float64x2x4_t @test_vld4q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T:%.*]] poison, <2 x double> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x double> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x double> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x double> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) {
return vld4q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x4_t @test_vld4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a)
-// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x4_t @test_vld4q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) {
return vld4q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x4_t @test_vld4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a)
-// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x4_t @test_vld4q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T:%.*]] poison, <8 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) {
return vld4q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld4q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) {
return vld4q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x4_t @test_vld4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x4_t @test_vld4_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) {
return vld4_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x4_t @test_vld4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x4_t @test_vld4_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) {
return vld4_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x4_t @test_vld4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x4_t @test_vld4_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T:%.*]] poison, <2 x i32> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i32> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) {
return vld4_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x4_t @test_vld4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint64x1x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.uint64x1x4_t @test_vld4_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_UINT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) {
return vld4_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.int8x8x4_t @test_vld4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int8x8x4_t @test_vld4_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) {
return vld4_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.int16x4x4_t @test_vld4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int16x4x4_t @test_vld4_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) {
return vld4_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.int32x2x4_t @test_vld4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int32x2x4_t @test_vld4_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T:%.*]] poison, <2 x i32> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i32> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i32> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) {
return vld4_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.int64x1x4_t @test_vld4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int64x1x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.int64x1x4_t @test_vld4_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_INT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) {
return vld4_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.float16x4x4_t @test_vld4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0(<4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float16x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float16x4x4_t @test_vld4_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T:%.*]] poison, <4 x half> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x half> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x half> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) {
return vld4_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.float32x2x4_t @test_vld4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
-// CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i64 1, ptr %a)
-// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float32x2x4_t @test_vld4_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T:%.*]] poison, <2 x float> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x float> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x float> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) {
return vld4_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x double> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x double>
-// CHECK: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], <1 x double> [[TMP15]], i64 0, ptr %a)
-// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float64x1x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.float64x1x4_t @test_vld4_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_3_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_6_24_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[B_SROA_6_24_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x double> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x double> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x double> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_FLOAT64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) {
return vld4_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x4_t @test_vld4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a)
-// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x4_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x4_t @test_vld4_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) {
return vld4_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x4_t @test_vld4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a)
-// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x4_t @test_vld4_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T:%.*]] poison, <4 x i16> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_1_INSERT]], <4 x i16> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_2_INSERT]], <4 x i16> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) {
return vld4_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
-// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x4_t [[TMP19]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld4_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) {
return vld4_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u8(uint8_t *a, uint8x16_t b) {
vst1q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u16(uint16_t *a, uint16x8_t b) {
vst1q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u32(ptr noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK: store i32 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u32(uint32_t *a, uint32x4_t b) {
vst1q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u64(uint64_t *a, uint64x2_t b) {
vst1q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s8(int8_t *a, int8x16_t b) {
vst1q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s16(int16_t *a, int16x8_t b) {
vst1q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s32(ptr noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK: store i32 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s32(int32_t *a, int32x4_t b) {
vst1q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s64(int64_t *a, int64x2_t b) {
vst1q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f16(ptr noundef %a, <8 x half> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
-// CHECK: store half [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
+// CHECK-NEXT: store half [[TMP3]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_f16(float16_t *a, float16x8_t b) {
vst1q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f32(ptr noundef %a, <4 x float> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
-// CHECK: store float [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
+// CHECK-NEXT: store float [[TMP3]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_f32(float32_t *a, float32x4_t b) {
vst1q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f64(ptr noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
-// CHECK: store double [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
+// CHECK-NEXT: store double [[TMP3]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_f64(float64_t *a, float64x2_t b) {
vst1q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p8(ptr noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_p8(poly8_t *a, poly8x16_t b) {
vst1q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p16(ptr noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_p16(poly16_t *a, poly16x8_t b) {
vst1q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p64(ptr noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_p64(poly64_t *a, poly64x2_t b) {
vst1q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_u8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u8(uint8_t *a, uint8x8_t b) {
vst1_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_u16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u16(uint16_t *a, uint16x4_t b) {
vst1_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_u32(ptr noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK: store i32 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u32(uint32_t *a, uint32x2_t b) {
vst1_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_u64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u64(uint64_t *a, uint64x1_t b) {
vst1_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_s8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s8(int8_t *a, int8x8_t b) {
vst1_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_s16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s16(int16_t *a, int16x4_t b) {
vst1_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_s32(ptr noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK: store i32 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s32(int32_t *a, int32x2_t b) {
vst1_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_s64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s64(int64_t *a, int64x1_t b) {
vst1_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_f16(ptr noundef %a, <4 x half> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
-// CHECK: store half [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
+// CHECK-NEXT: store half [[TMP3]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_f16(float16_t *a, float16x4_t b) {
vst1_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_f32(ptr noundef %a, <2 x float> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-// CHECK: store float [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+// CHECK-NEXT: store float [[TMP3]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_f32(float32_t *a, float32x2_t b) {
vst1_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_f64(ptr noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x double> [[TMP2]], i32 0
-// CHECK: store double [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x double> [[TMP2]], i32 0
+// CHECK-NEXT: store double [[TMP3]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_f64(float64_t *a, float64x1_t b) {
vst1_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_p8(ptr noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_p8(poly8_t *a, poly8x8_t b) {
vst1_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_p16(ptr noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_p16(poly16_t *a, poly16x4_t b) {
vst1_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_lane_p64(ptr noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
-// CHECK: store i64 [[TMP3]], ptr %a
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_p64(poly64_t *a, poly64x1_t b) {
vst1_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) {
vst2q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) {
vst2q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) {
vst2q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u64(uint64_t *a, uint64x2x2_t b) {
vst2q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) {
vst2q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s16(int16_t *a, int16x8x2_t b) {
vst2q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s32(int32_t *a, int32x4x2_t b) {
vst2q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s64(int64_t *a, int64x2x2_t b) {
vst2q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8f16.p0(<8 x half> [[TMP4]], <8 x half> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_f16(float16_t *a, float16x8x2_t b) {
vst2q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> [[TMP4]], <4 x float> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_f32(float32_t *a, float32x4x2_t b) {
vst2q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> [[TMP4]], <2 x double> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_f64(float64_t *a, float64x2x2_t b) {
vst2q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) {
vst2q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_p16(poly16_t *a, poly16x8x2_t b) {
vst2q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_p64(poly64_t *a, poly64x2x2_t b) {
vst2q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) {
vst2_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) {
vst2_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u32(uint32_t *a, uint32x2x2_t b) {
vst2_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u64(uint64_t *a, uint64x1x2_t b) {
vst2_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) {
vst2_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s16(int16_t *a, int16x4x2_t b) {
vst2_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s32(int32_t *a, int32x2x2_t b) {
vst2_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s64(int64_t *a, int64x1x2_t b) {
vst2_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4f16.p0(<4 x half> [[TMP4]], <4 x half> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_f16(float16_t *a, float16x4x2_t b) {
vst2_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> [[TMP4]], <2 x float> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_f32(float32_t *a, float32x2x2_t b) {
vst2_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> [[TMP4]], <1 x double> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_f64(float64_t *a, float64x1x2_t b) {
vst2_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) {
vst2_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_p16(poly16_t *a, poly16x4x2_t b) {
vst2_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_p64(poly64_t *a, poly64x1x2_t b) {
vst2_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) {
vst3q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) {
vst3q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) {
vst3q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) {
vst3q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) {
vst3q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) {
vst3q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) {
vst3q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) {
vst3q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8f16.p0(<8 x half> [[TMP6]], <8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) {
vst3q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> [[TMP6]], <4 x float> [[TMP7]], <4 x float> [[TMP8]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) {
vst3q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x double> [[TMP8]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) {
vst3q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) {
vst3q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) {
vst3q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) {
vst3q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) {
vst3_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) {
vst3_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) {
vst3_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) {
vst3_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) {
vst3_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) {
vst3_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) {
vst3_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) {
vst3_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4f16.p0(<4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) {
vst3_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) {
vst3_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> [[TMP6]], <1 x double> [[TMP7]], <1 x double> [[TMP8]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) {
vst3_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) {
vst3_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) {
vst3_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) {
vst3_lane_p64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) {
vst4q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) {
vst4q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) {
vst4q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) {
vst4q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) {
vst4q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) {
vst4q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) {
vst4q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) {
vst4q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) {
vst4q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) {
vst4q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x double>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) {
vst4q_lane_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) {
vst4q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) {
vst4q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) {
vst4q_lane_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) {
vst4_lane_u8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) {
vst4_lane_u16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) {
vst4_lane_u32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) {
vst4_lane_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) {
vst4_lane_s8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) {
vst4_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) {
vst4_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) {
vst4_lane_s64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) {
vst4_lane_f16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i64 1, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) {
vst4_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_f64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x double>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_0_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_1_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_2_8_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_2_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_4_16_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x double>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x double> [[B_COERCE_FCA_3_EXTRACT]] to i64
+// CHECK-NEXT: [[B_SROA_6_24_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[B_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[B_SROA_2_8_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[B_SROA_4_16_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[B_SROA_6_24_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) {
vst4_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) {
vst4_lane_p8(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], i64 3, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) {
vst4_lane_p16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
-// CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_lane_p64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], i64 0, ptr [[A]])
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_p64(poly64_t *a, poly64x1x4_t b) {
vst4_lane_p64(a, b, 0);
}
diff --git a/clang/test/CodeGen/AArch64/neon-misc-constrained.c b/clang/test/CodeGen/AArch64/neon-misc-constrained.c
index e24e129d2bc7d..06ecfd91252a1 100644
--- a/clang/test/CodeGen/AArch64/neon-misc-constrained.c
+++ b/clang/test/CodeGen/AArch64/neon-misc-constrained.c
@@ -1,17 +1,11 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck --check-prefix=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -ffp-exception-behavior=strict \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -S -o - %s \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -ffp-exception-behavior=strict \
-// RUN: -disable-O0-optnone -S -o - %s \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck --check-prefix=CONSTRAINED %s
// REQUIRES: aarch64-registered-target
@@ -19,42 +13,93 @@
#include <arm_neon.h>
-// COMMON-LABEL: test_vrndaq_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// UNCONSTRAINED: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
-// CONSTRAINED: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %a, metadata !"fpexcept.strict")
-// CHECK-ASM: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-// COMMONIR: ret <2 x double> [[VRNDA1_I]]
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vrndaq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]])
+// UNCONSTRAINED-NEXT: ret <2 x double> [[VRNDA1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vrndaq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> [[VRNDA_I]], metadata !"fpexcept.strict") #[[ATTR2:[0-9]+]]
+// CONSTRAINED-NEXT: ret <2 x double> [[VRNDA1_I]]
+//
float64x2_t test_vrndaq_f64(float64x2_t a) {
return vrndaq_f64(a);
}
-// COMMON-LABEL: test_vrndpq_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// UNCONSTRAINED: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
-// CONSTRAINED: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %a, metadata !"fpexcept.strict")
-// CHECK-ASM: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-// COMMONIR: ret <2 x double> [[VRNDP1_I]]
+
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vrndpq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]])
+// UNCONSTRAINED-NEXT: ret <2 x double> [[VRNDP1_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vrndpq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> [[VRNDP_I]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <2 x double> [[VRNDP1_I]]
+//
float64x2_t test_vrndpq_f64(float64x2_t a) {
return vrndpq_f64(a);
}
-// COMMON-LABEL: test_vsqrtq_f32
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// UNCONSTRAINED: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
-// CONSTRAINED: [[VSQRT_I:%.*]] = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-// COMMONIR: ret <4 x float> [[VSQRT_I]]
+
+// UNCONSTRAINED-LABEL: define dso_local <4 x float> @test_vsqrtq_f32(
+// UNCONSTRAINED-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// UNCONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
+// UNCONSTRAINED-NEXT: ret <4 x float> [[VSQRT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x float> @test_vsqrtq_f32(
+// CONSTRAINED-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> [[TMP2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x float> [[VSQRT_I]]
+//
float32x4_t test_vsqrtq_f32(float32x4_t a) {
return vsqrtq_f32(a);
}
-// COMMON-LABEL: test_vsqrtq_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// UNCONSTRAINED: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
-// CONSTRAINED: [[VSQRT_I:%.*]] = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-// COMMONIR: ret <2 x double> [[VSQRT_I]]
+
+// UNCONSTRAINED-LABEL: define dso_local <2 x double> @test_vsqrtq_f64(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
+// UNCONSTRAINED-NEXT: ret <2 x double> [[VSQRT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <2 x double> @test_vsqrtq_f64(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> [[TMP2]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <2 x double> [[VSQRT_I]]
+//
float64x2_t test_vsqrtq_f64(float64x2_t a) {
return vsqrtq_f64(a);
}
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index 165f33a9f399f..6eadaaf27a210 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -1,2718 +1,4056 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vceqz_s8(
-// CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
+//
uint8x8_t test_vceqz_s8(int8x8_t a) {
return vceqz_s8(a);
}
-// CHECK-LABEL: @test_vceqz_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
+//
uint16x4_t test_vceqz_s16(int16x4_t a) {
return vceqz_s16(a);
}
-// CHECK-LABEL: @test_vceqz_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
+//
uint32x2_t test_vceqz_s32(int32x2_t a) {
return vceqz_s32(a);
}
-// CHECK-LABEL: @test_vceqz_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
+//
uint64x1_t test_vceqz_s64(int64x1_t a) {
return vceqz_s64(a);
}
-// CHECK-LABEL: @test_vceqz_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
+//
uint64x1_t test_vceqz_u64(uint64x1_t a) {
return vceqz_u64(a);
}
-// CHECK-LABEL: @test_vceqz_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <1 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
+//
uint64x1_t test_vceqz_p64(poly64x1_t a) {
return vceqz_p64(a);
}
-// CHECK-LABEL: @test_vceqzq_s8(
-// CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
+//
uint8x16_t test_vceqzq_s8(int8x16_t a) {
return vceqzq_s8(a);
}
-// CHECK-LABEL: @test_vceqzq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
+//
uint16x8_t test_vceqzq_s16(int16x8_t a) {
return vceqzq_s16(a);
}
-// CHECK-LABEL: @test_vceqzq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
+//
uint32x4_t test_vceqzq_s32(int32x4_t a) {
return vceqzq_s32(a);
}
-// CHECK-LABEL: @test_vceqzq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
+//
uint64x2_t test_vceqzq_s64(int64x2_t a) {
return vceqzq_s64(a);
}
-// CHECK-LABEL: @test_vceqz_u8(
-// CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
+//
uint8x8_t test_vceqz_u8(uint8x8_t a) {
return vceqz_u8(a);
}
-// CHECK-LABEL: @test_vceqz_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
+//
uint16x4_t test_vceqz_u16(uint16x4_t a) {
return vceqz_u16(a);
}
-// CHECK-LABEL: @test_vceqz_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <2 x i32> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
+//
uint32x2_t test_vceqz_u32(uint32x2_t a) {
return vceqz_u32(a);
}
-// CHECK-LABEL: @test_vceqzq_u8(
-// CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
+//
uint8x16_t test_vceqzq_u8(uint8x16_t a) {
return vceqzq_u8(a);
}
-// CHECK-LABEL: @test_vceqzq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <8 x i16> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
+//
uint16x8_t test_vceqzq_u16(uint16x8_t a) {
return vceqzq_u16(a);
}
-// CHECK-LABEL: @test_vceqzq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <4 x i32> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
+//
uint32x4_t test_vceqzq_u32(uint32x4_t a) {
return vceqzq_u32(a);
}
-// CHECK-LABEL: @test_vceqzq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
+//
uint64x2_t test_vceqzq_u64(uint64x2_t a) {
return vceqzq_u64(a);
}
-// CHECK-LABEL: @test_vceqz_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oeq <2 x float> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
+//
uint32x2_t test_vceqz_f32(float32x2_t a) {
return vceqz_f32(a);
}
-// CHECK-LABEL: @test_vceqz_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oeq <1 x double> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
+//
uint64x1_t test_vceqz_f64(float64x1_t a) {
return vceqz_f64(a);
}
-// CHECK-LABEL: @test_vceqzq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x float> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
+//
uint32x4_t test_vceqzq_f32(float32x4_t a) {
return vceqzq_f32(a);
}
-// CHECK-LABEL: @test_vceqz_p8(
-// CHECK: [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
+//
uint8x8_t test_vceqz_p8(poly8x8_t a) {
return vceqz_p8(a);
}
-// CHECK-LABEL: @test_vceqzq_p8(
-// CHECK: [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
+//
uint8x16_t test_vceqzq_p8(poly8x16_t a) {
return vceqzq_p8(a);
}
-// CHECK-LABEL: @test_vceqzq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oeq <2 x double> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
+//
uint64x2_t test_vceqzq_f64(float64x2_t a) {
return vceqzq_f64(a);
}
-// CHECK-LABEL: @test_vceqzq_p64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp eq <2 x i64> %a, zeroinitializer
-// CHECK: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCEQZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
+//
uint64x2_t test_vceqzq_p64(poly64x2_t a) {
return vceqzq_p64(a);
}
-// CHECK-LABEL: @test_vcgez_s8(
-// CHECK: [[TMP0:%.*]] = icmp sge <8 x i8> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcgez_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCGEZ_I]]
+//
uint8x8_t test_vcgez_s8(int8x8_t a) {
return vcgez_s8(a);
}
-// CHECK-LABEL: @test_vcgez_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <4 x i16> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcgez_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCGEZ_I]]
+//
uint16x4_t test_vcgez_s16(int16x4_t a) {
return vcgez_s16(a);
}
-// CHECK-LABEL: @test_vcgez_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <2 x i32> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgez_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCGEZ_I]]
+//
uint32x2_t test_vcgez_s32(int32x2_t a) {
return vcgez_s32(a);
}
-// CHECK-LABEL: @test_vcgez_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <1 x i64> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgez_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCGEZ_I]]
+//
uint64x1_t test_vcgez_s64(int64x1_t a) {
return vcgez_s64(a);
}
-// CHECK-LABEL: @test_vcgezq_s8(
-// CHECK: [[TMP0:%.*]] = icmp sge <16 x i8> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgezq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCGEZ_I]]
+//
uint8x16_t test_vcgezq_s8(int8x16_t a) {
return vcgezq_s8(a);
}
-// CHECK-LABEL: @test_vcgezq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <8 x i16> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgezq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCGEZ_I]]
+//
uint16x8_t test_vcgezq_s16(int16x8_t a) {
return vcgezq_s16(a);
}
-// CHECK-LABEL: @test_vcgezq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <4 x i32> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgezq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCGEZ_I]]
+//
uint32x4_t test_vcgezq_s32(int32x4_t a) {
return vcgezq_s32(a);
}
-// CHECK-LABEL: @test_vcgezq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sge <2 x i64> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgezq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCGEZ_I]]
+//
uint64x2_t test_vcgezq_s64(int64x2_t a) {
return vcgezq_s64(a);
}
-// CHECK-LABEL: @test_vcgez_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oge <2 x float> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgez_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCGEZ_I]]
+//
uint32x2_t test_vcgez_f32(float32x2_t a) {
return vcgez_f32(a);
}
-// CHECK-LABEL: @test_vcgez_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oge <1 x double> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgez_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <1 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCGEZ_I]]
+//
uint64x1_t test_vcgez_f64(float64x1_t a) {
return vcgez_f64(a);
}
-// CHECK-LABEL: @test_vcgezq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oge <4 x float> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgezq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <4 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCGEZ_I]]
+//
uint32x4_t test_vcgezq_f32(float32x4_t a) {
return vcgezq_f32(a);
}
-// CHECK-LABEL: @test_vcgezq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp oge <2 x double> %a, zeroinitializer
-// CHECK: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCGEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgezq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCGEZ_I]]
+//
uint64x2_t test_vcgezq_f64(float64x2_t a) {
return vcgezq_f64(a);
}
-// CHECK-LABEL: @test_vclez_s8(
-// CHECK: [[TMP0:%.*]] = icmp sle <8 x i8> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vclez_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCLEZ_I]]
+//
uint8x8_t test_vclez_s8(int8x8_t a) {
return vclez_s8(a);
}
-// CHECK-LABEL: @test_vclez_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <4 x i16> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vclez_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCLEZ_I]]
+//
uint16x4_t test_vclez_s16(int16x4_t a) {
return vclez_s16(a);
}
-// CHECK-LABEL: @test_vclez_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <2 x i32> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclez_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCLEZ_I]]
+//
uint32x2_t test_vclez_s32(int32x2_t a) {
return vclez_s32(a);
}
-// CHECK-LABEL: @test_vclez_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <1 x i64> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vclez_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCLEZ_I]]
+//
uint64x1_t test_vclez_s64(int64x1_t a) {
return vclez_s64(a);
}
-// CHECK-LABEL: @test_vclezq_s8(
-// CHECK: [[TMP0:%.*]] = icmp sle <16 x i8> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vclezq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCLEZ_I]]
+//
uint8x16_t test_vclezq_s8(int8x16_t a) {
return vclezq_s8(a);
}
-// CHECK-LABEL: @test_vclezq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <8 x i16> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vclezq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCLEZ_I]]
+//
uint16x8_t test_vclezq_s16(int16x8_t a) {
return vclezq_s16(a);
}
-// CHECK-LABEL: @test_vclezq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <4 x i32> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclezq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCLEZ_I]]
+//
uint32x4_t test_vclezq_s32(int32x4_t a) {
return vclezq_s32(a);
}
-// CHECK-LABEL: @test_vclezq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sle <2 x i64> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vclezq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCLEZ_I]]
+//
uint64x2_t test_vclezq_s64(int64x2_t a) {
return vclezq_s64(a);
}
-// CHECK-LABEL: @test_vclez_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ole <2 x float> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclez_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <2 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCLEZ_I]]
+//
uint32x2_t test_vclez_f32(float32x2_t a) {
return vclez_f32(a);
}
-// CHECK-LABEL: @test_vclez_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ole <1 x double> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vclez_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <1 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCLEZ_I]]
+//
uint64x1_t test_vclez_f64(float64x1_t a) {
return vclez_f64(a);
}
-// CHECK-LABEL: @test_vclezq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ole <4 x float> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclezq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <4 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCLEZ_I]]
+//
uint32x4_t test_vclezq_f32(float32x4_t a) {
return vclezq_f32(a);
}
-// CHECK-LABEL: @test_vclezq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ole <2 x double> %a, zeroinitializer
-// CHECK: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCLEZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vclezq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <2 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCLEZ_I]]
+//
uint64x2_t test_vclezq_f64(float64x2_t a) {
return vclezq_f64(a);
}
-// CHECK-LABEL: @test_vcgtz_s8(
-// CHECK: [[TMP0:%.*]] = icmp sgt <8 x i8> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcgtz_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCGTZ_I]]
+//
uint8x8_t test_vcgtz_s8(int8x8_t a) {
return vcgtz_s8(a);
}
-// CHECK-LABEL: @test_vcgtz_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <4 x i16> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcgtz_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCGTZ_I]]
+//
uint16x4_t test_vcgtz_s16(int16x4_t a) {
return vcgtz_s16(a);
}
-// CHECK-LABEL: @test_vcgtz_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <2 x i32> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgtz_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCGTZ_I]]
+//
uint32x2_t test_vcgtz_s32(int32x2_t a) {
return vcgtz_s32(a);
}
-// CHECK-LABEL: @test_vcgtz_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <1 x i64> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgtz_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCGTZ_I]]
+//
uint64x1_t test_vcgtz_s64(int64x1_t a) {
return vcgtz_s64(a);
}
-// CHECK-LABEL: @test_vcgtzq_s8(
-// CHECK: [[TMP0:%.*]] = icmp sgt <16 x i8> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcgtzq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCGTZ_I]]
+//
uint8x16_t test_vcgtzq_s8(int8x16_t a) {
return vcgtzq_s8(a);
}
-// CHECK-LABEL: @test_vcgtzq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <8 x i16> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcgtzq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCGTZ_I]]
+//
uint16x8_t test_vcgtzq_s16(int16x8_t a) {
return vcgtzq_s16(a);
}
-// CHECK-LABEL: @test_vcgtzq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgtzq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCGTZ_I]]
+//
uint32x4_t test_vcgtzq_s32(int32x4_t a) {
return vcgtzq_s32(a);
}
-// CHECK-LABEL: @test_vcgtzq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp sgt <2 x i64> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgtzq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCGTZ_I]]
+//
uint64x2_t test_vcgtzq_s64(int64x2_t a) {
return vcgtzq_s64(a);
}
-// CHECK-LABEL: @test_vcgtz_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ogt <2 x float> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcgtz_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCGTZ_I]]
+//
uint32x2_t test_vcgtz_f32(float32x2_t a) {
return vcgtz_f32(a);
}
-// CHECK-LABEL: @test_vcgtz_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ogt <1 x double> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcgtz_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <1 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCGTZ_I]]
+//
uint64x1_t test_vcgtz_f64(float64x1_t a) {
return vcgtz_f64(a);
}
-// CHECK-LABEL: @test_vcgtzq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x float> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcgtzq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCGTZ_I]]
+//
uint32x4_t test_vcgtzq_f32(float32x4_t a) {
return vcgtzq_f32(a);
}
-// CHECK-LABEL: @test_vcgtzq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp ogt <2 x double> %a, zeroinitializer
-// CHECK: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCGTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcgtzq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCGTZ_I]]
+//
uint64x2_t test_vcgtzq_f64(float64x2_t a) {
return vcgtzq_f64(a);
}
-// CHECK-LABEL: @test_vcltz_s8(
-// CHECK: [[TMP0:%.*]] = icmp slt <8 x i8> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcltz_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VCLTZ_I]]
+//
uint8x8_t test_vcltz_s8(int8x8_t a) {
return vcltz_s8(a);
}
-// CHECK-LABEL: @test_vcltz_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <4 x i16> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcltz_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VCLTZ_I]]
+//
uint16x4_t test_vcltz_s16(int16x4_t a) {
return vcltz_s16(a);
}
-// CHECK-LABEL: @test_vcltz_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <2 x i32> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcltz_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCLTZ_I]]
+//
uint32x2_t test_vcltz_s32(int32x2_t a) {
return vcltz_s32(a);
}
-// CHECK-LABEL: @test_vcltz_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <1 x i64> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcltz_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <1 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCLTZ_I]]
+//
uint64x1_t test_vcltz_s64(int64x1_t a) {
return vcltz_s64(a);
}
-// CHECK-LABEL: @test_vcltzq_s8(
-// CHECK: [[TMP0:%.*]] = icmp slt <16 x i8> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcltzq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VCLTZ_I]]
+//
uint8x16_t test_vcltzq_s8(int8x16_t a) {
return vcltzq_s8(a);
}
-// CHECK-LABEL: @test_vcltzq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <8 x i16> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vcltzq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VCLTZ_I]]
+//
uint16x8_t test_vcltzq_s16(int16x8_t a) {
return vcltzq_s16(a);
}
-// CHECK-LABEL: @test_vcltzq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <4 x i32> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcltzq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCLTZ_I]]
+//
uint32x4_t test_vcltzq_s32(int32x4_t a) {
return vcltzq_s32(a);
}
-// CHECK-LABEL: @test_vcltzq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = icmp slt <2 x i64> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcltzq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCLTZ_I]]
+//
uint64x2_t test_vcltzq_s64(int64x2_t a) {
return vcltzq_s64(a);
}
-// CHECK-LABEL: @test_vcltz_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp olt <2 x float> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcltz_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCLTZ_I]]
+//
uint32x2_t test_vcltz_f32(float32x2_t a) {
return vcltz_f32(a);
}
-// CHECK-LABEL: @test_vcltz_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp olt <1 x double> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP1]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcltz_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <1 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VCLTZ_I]]
+//
uint64x1_t test_vcltz_f64(float64x1_t a) {
return vcltz_f64(a);
}
-// CHECK-LABEL: @test_vcltzq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp olt <4 x float> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcltzq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCLTZ_I]]
+//
uint32x4_t test_vcltzq_f32(float32x4_t a) {
return vcltzq_f32(a);
}
-// CHECK-LABEL: @test_vcltzq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = fcmp olt <2 x double> %a, zeroinitializer
-// CHECK: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VCLTZ_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcltzq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x double> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VCLTZ_I]]
+//
uint64x2_t test_vcltzq_f64(float64x2_t a) {
return vcltzq_f64(a);
}
-// CHECK-LABEL: @test_vrev16_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev16_s8(int8x8_t a) {
return vrev16_s8(a);
}
-// CHECK-LABEL: @test_vrev16_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev16_u8(uint8x8_t a) {
return vrev16_u8(a);
}
-// CHECK-LABEL: @test_vrev16_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev16_p8(poly8x8_t a) {
return vrev16_p8(a);
}
-// CHECK-LABEL: @test_vrev16q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev16q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev16q_s8(int8x16_t a) {
return vrev16q_s8(a);
}
-// CHECK-LABEL: @test_vrev16q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev16q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev16q_u8(uint8x16_t a) {
return vrev16q_u8(a);
}
-// CHECK-LABEL: @test_vrev16q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev16q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev16q_p8(poly8x16_t a) {
return vrev16q_p8(a);
}
-// CHECK-LABEL: @test_vrev32_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev32_s8(int8x8_t a) {
return vrev32_s8(a);
}
-// CHECK-LABEL: @test_vrev32_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vrev32_s16(int16x4_t a) {
return vrev32_s16(a);
}
-// CHECK-LABEL: @test_vrev32_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev32_u8(uint8x8_t a) {
return vrev32_u8(a);
}
-// CHECK-LABEL: @test_vrev32_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vrev32_u16(uint16x4_t a) {
return vrev32_u16(a);
}
-// CHECK-LABEL: @test_vrev32_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev32_p8(poly8x8_t a) {
return vrev32_p8(a);
}
-// CHECK-LABEL: @test_vrev32_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vrev32_p16(poly16x4_t a) {
return vrev32_p16(a);
}
-// CHECK-LABEL: @test_vrev32q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev32q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev32q_s8(int8x16_t a) {
return vrev32q_s8(a);
}
-// CHECK-LABEL: @test_vrev32q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev32q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vrev32q_s16(int16x8_t a) {
return vrev32q_s16(a);
}
-// CHECK-LABEL: @test_vrev32q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev32q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev32q_u8(uint8x16_t a) {
return vrev32q_u8(a);
}
-// CHECK-LABEL: @test_vrev32q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev32q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vrev32q_u16(uint16x8_t a) {
return vrev32q_u16(a);
}
-// CHECK-LABEL: @test_vrev32q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev32q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev32q_p8(poly8x16_t a) {
return vrev32q_p8(a);
}
-// CHECK-LABEL: @test_vrev32q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev32q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vrev32q_p16(poly16x8_t a) {
return vrev32q_p16(a);
}
-// CHECK-LABEL: @test_vrev64_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev64_s8(int8x8_t a) {
return vrev64_s8(a);
}
-// CHECK-LABEL: @test_vrev64_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vrev64_s16(int16x4_t a) {
return vrev64_s16(a);
}
-// CHECK-LABEL: @test_vrev64_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrev64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vrev64_s32(int32x2_t a) {
return vrev64_s32(a);
}
-// CHECK-LABEL: @test_vrev64_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev64_u8(uint8x8_t a) {
return vrev64_u8(a);
}
-// CHECK-LABEL: @test_vrev64_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vrev64_u16(uint16x4_t a) {
return vrev64_u16(a);
}
-// CHECK-LABEL: @test_vrev64_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrev64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vrev64_u32(uint32x2_t a) {
return vrev64_u32(a);
}
-// CHECK-LABEL: @test_vrev64_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrev64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev64_p8(poly8x8_t a) {
return vrev64_p8(a);
}
-// CHECK-LABEL: @test_vrev64_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vrev64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vrev64_p16(poly16x4_t a) {
return vrev64_p16(a);
}
-// CHECK-LABEL: @test_vrev64_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrev64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vrev64_f32(float32x2_t a) {
return vrev64_f32(a);
}
-// CHECK-LABEL: @test_vrev64q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev64q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev64q_s8(int8x16_t a) {
return vrev64q_s8(a);
}
-// CHECK-LABEL: @test_vrev64q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev64q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vrev64q_s16(int16x8_t a) {
return vrev64q_s16(a);
}
-// CHECK-LABEL: @test_vrev64q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrev64q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vrev64q_s32(int32x4_t a) {
return vrev64q_s32(a);
}
-// CHECK-LABEL: @test_vrev64q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev64q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev64q_u8(uint8x16_t a) {
return vrev64q_u8(a);
}
-// CHECK-LABEL: @test_vrev64q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev64q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vrev64q_u16(uint16x8_t a) {
return vrev64q_u16(a);
}
-// CHECK-LABEL: @test_vrev64q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrev64q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vrev64q_u32(uint32x4_t a) {
return vrev64q_u32(a);
}
-// CHECK-LABEL: @test_vrev64q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrev64q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev64q_p8(poly8x16_t a) {
return vrev64q_p8(a);
}
-// CHECK-LABEL: @test_vrev64q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vrev64q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vrev64q_p16(poly16x8_t a) {
return vrev64q_p16(a);
}
-// CHECK-LABEL: @test_vrev64q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrev64q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vrev64q_f32(float32x4_t a) {
return vrev64q_f32(a);
}
-// CHECK-LABEL: @test_vpaddl_s8(
-// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a)
-// CHECK: ret <4 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpaddl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i16> [[VPADDL_I]]
+//
int16x4_t test_vpaddl_s8(int8x8_t a) {
return vpaddl_s8(a);
}
-// CHECK-LABEL: @test_vpaddl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %a)
-// CHECK: ret <2 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpaddl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADDL1_I]]
+//
int32x2_t test_vpaddl_s16(int16x4_t a) {
return vpaddl_s16(a);
}
-// CHECK-LABEL: @test_vpaddl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %a)
-// CHECK: ret <1 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vpaddl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADDL1_I]]
+//
int64x1_t test_vpaddl_s32(int32x2_t a) {
return vpaddl_s32(a);
}
-// CHECK-LABEL: @test_vpaddl_u8(
-// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a)
-// CHECK: ret <4 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpaddl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i16> [[VPADDL_I]]
+//
uint16x4_t test_vpaddl_u8(uint8x8_t a) {
return vpaddl_u8(a);
}
-// CHECK-LABEL: @test_vpaddl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a)
-// CHECK: ret <2 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpaddl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADDL1_I]]
+//
uint32x2_t test_vpaddl_u16(uint16x4_t a) {
return vpaddl_u16(a);
}
-// CHECK-LABEL: @test_vpaddl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a)
-// CHECK: ret <1 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vpaddl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADDL1_I]]
+//
uint64x1_t test_vpaddl_u32(uint32x2_t a) {
return vpaddl_u32(a);
}
-// CHECK-LABEL: @test_vpaddlq_s8(
-// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a)
-// CHECK: ret <8 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpaddlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i16> [[VPADDL_I]]
+//
int16x8_t test_vpaddlq_s8(int8x16_t a) {
return vpaddlq_s8(a);
}
-// CHECK-LABEL: @test_vpaddlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %a)
-// CHECK: ret <4 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpaddlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADDL1_I]]
+//
int32x4_t test_vpaddlq_s16(int16x8_t a) {
return vpaddlq_s16(a);
}
-// CHECK-LABEL: @test_vpaddlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %a)
-// CHECK: ret <2 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpaddlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADDL1_I]]
+//
int64x2_t test_vpaddlq_s32(int32x4_t a) {
return vpaddlq_s32(a);
}
-// CHECK-LABEL: @test_vpaddlq_u8(
-// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a)
-// CHECK: ret <8 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpaddlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i16> [[VPADDL_I]]
+//
uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
return vpaddlq_u8(a);
}
-// CHECK-LABEL: @test_vpaddlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a)
-// CHECK: ret <4 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpaddlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADDL1_I]]
+//
uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
return vpaddlq_u16(a);
}
-// CHECK-LABEL: @test_vpaddlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a)
-// CHECK: ret <2 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpaddlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADDL1_I]]
+//
uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
return vpaddlq_u32(a);
}
-// CHECK-LABEL: @test_vpadal_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b)
-// CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a
-// CHECK: ret <4 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpadal_s8(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[B]])
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
return vpadal_s8(a, b);
}
-// CHECK-LABEL: @test_vpadal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %b)
-// CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a
-// CHECK: ret <2 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpadal_s16(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
return vpadal_s16(a, b);
}
-// CHECK-LABEL: @test_vpadal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %b)
-// CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a
-// CHECK: ret <1 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vpadal_s32(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <1 x i64> [[TMP3]]
+//
int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
return vpadal_s32(a, b);
}
-// CHECK-LABEL: @test_vpadal_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b)
-// CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a
-// CHECK: ret <4 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vpadal_u8(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[B]])
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
return vpadal_u8(a, b);
}
-// CHECK-LABEL: @test_vpadal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b)
-// CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a
-// CHECK: ret <2 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vpadal_u16(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
return vpadal_u16(a, b);
}
-// CHECK-LABEL: @test_vpadal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b)
-// CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a
-// CHECK: ret <1 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vpadal_u32(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <1 x i64> [[TMP3]]
+//
uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
return vpadal_u32(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b)
-// CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a
-// CHECK: ret <8 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpadalq_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[B]])
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
return vpadalq_s8(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %b)
-// CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a
-// CHECK: ret <4 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpadalq_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
return vpadalq_s16(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %b)
-// CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a
-// CHECK: ret <2 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpadalq_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
return vpadalq_s32(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b)
-// CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a
-// CHECK: ret <8 x i16> [[TMP1]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vpadalq_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[B]])
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
return vpadalq_u8(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b)
-// CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a
-// CHECK: ret <4 x i32> [[TMP2]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vpadalq_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
return vpadalq_u16(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b)
-// CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a
-// CHECK: ret <2 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vpadalq_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]])
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
return vpadalq_u32(a, b);
}
-// CHECK-LABEL: @test_vqabs_s8(
-// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VQABS_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqabs_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VQABS_V_I]]
+//
int8x8_t test_vqabs_s8(int8x8_t a) {
return vqabs_s8(a);
}
-// CHECK-LABEL: @test_vqabsq_s8(
-// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VQABSQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqabsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VQABSQ_V_I]]
+//
int8x16_t test_vqabsq_s8(int8x16_t a) {
return vqabsq_s8(a);
}
-// CHECK-LABEL: @test_vqabs_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %a)
-// CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQABS_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqabs_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[VQABS_V_I]])
+// CHECK-NEXT: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqabs_s16(int16x4_t a) {
return vqabs_s16(a);
}
-// CHECK-LABEL: @test_vqabsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %a)
-// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQABSQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqabsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> [[VQABSQ_V_I]])
+// CHECK-NEXT: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vqabsq_s16(int16x8_t a) {
return vqabsq_s16(a);
}
-// CHECK-LABEL: @test_vqabs_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %a)
-// CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQABS_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqabs_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> [[VQABS_V_I]])
+// CHECK-NEXT: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqabs_s32(int32x2_t a) {
return vqabs_s32(a);
}
-// CHECK-LABEL: @test_vqabsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %a)
-// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQABSQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqabsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> [[VQABSQ_V_I]])
+// CHECK-NEXT: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vqabsq_s32(int32x4_t a) {
return vqabsq_s32(a);
}
-// CHECK-LABEL: @test_vqabsq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> %a)
-// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQABSQ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqabsq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> [[VQABSQ_V_I]])
+// CHECK-NEXT: [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP1]]
+//
int64x2_t test_vqabsq_s64(int64x2_t a) {
return vqabsq_s64(a);
}
-// CHECK-LABEL: @test_vqneg_s8(
-// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VQNEG_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqneg_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VQNEG_V_I]]
+//
int8x8_t test_vqneg_s8(int8x8_t a) {
return vqneg_s8(a);
}
-// CHECK-LABEL: @test_vqnegq_s8(
-// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqnegq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VQNEGQ_V_I]]
+//
int8x16_t test_vqnegq_s8(int8x16_t a) {
return vqnegq_s8(a);
}
-// CHECK-LABEL: @test_vqneg_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %a)
-// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQNEG_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqneg_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[VQNEG_V_I]])
+// CHECK-NEXT: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqneg_s16(int16x4_t a) {
return vqneg_s16(a);
}
-// CHECK-LABEL: @test_vqnegq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %a)
-// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQNEGQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqnegq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]])
+// CHECK-NEXT: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vqnegq_s16(int16x8_t a) {
return vqnegq_s16(a);
}
-// CHECK-LABEL: @test_vqneg_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %a)
-// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQNEG_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqneg_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> [[VQNEG_V_I]])
+// CHECK-NEXT: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqneg_s32(int32x2_t a) {
return vqneg_s32(a);
}
-// CHECK-LABEL: @test_vqnegq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %a)
-// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQNEGQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqnegq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]])
+// CHECK-NEXT: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vqnegq_s32(int32x4_t a) {
return vqnegq_s32(a);
}
-// CHECK-LABEL: @test_vqnegq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> %a)
-// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQNEGQ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vqnegq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> [[VQNEGQ_V_I]])
+// CHECK-NEXT: [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP1]]
+//
int64x2_t test_vqnegq_s64(int64x2_t a) {
return vqnegq_s64(a);
}
-// CHECK-LABEL: @test_vneg_s8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vneg_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vneg_s8(int8x8_t a) {
return vneg_s8(a);
}
-// CHECK-LABEL: @test_vnegq_s8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vnegq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vnegq_s8(int8x16_t a) {
return vnegq_s8(a);
}
-// CHECK-LABEL: @test_vneg_s16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vneg_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vneg_s16(int16x4_t a) {
return vneg_s16(a);
}
-// CHECK-LABEL: @test_vnegq_s16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vnegq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vnegq_s16(int16x8_t a) {
return vnegq_s16(a);
}
-// CHECK-LABEL: @test_vneg_s32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vneg_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vneg_s32(int32x2_t a) {
return vneg_s32(a);
}
-// CHECK-LABEL: @test_vnegq_s32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vnegq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vnegq_s32(int32x4_t a) {
return vnegq_s32(a);
}
-// CHECK-LABEL: @test_vnegq_s64(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, %a
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vnegq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vnegq_s64(int64x2_t a) {
return vnegq_s64(a);
}
-// CHECK-LABEL: @test_vneg_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x float> %a
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vneg_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[A]]
+// CHECK-NEXT: ret <2 x float> [[FNEG_I]]
+//
float32x2_t test_vneg_f32(float32x2_t a) {
return vneg_f32(a);
}
-// CHECK-LABEL: @test_vnegq_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <4 x float> %a
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vnegq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[A]]
+// CHECK-NEXT: ret <4 x float> [[FNEG_I]]
+//
float32x4_t test_vnegq_f32(float32x4_t a) {
return vnegq_f32(a);
}
-// CHECK-LABEL: @test_vnegq_f64(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x double> %a
-// CHECK: ret <2 x double> [[SUB_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vnegq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[A]]
+// CHECK-NEXT: ret <2 x double> [[FNEG_I]]
+//
float64x2_t test_vnegq_f64(float64x2_t a) {
return vnegq_f64(a);
}
-// CHECK-LABEL: @test_vabs_s8(
-// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VABS_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vabs_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VABS_I]]
+//
int8x8_t test_vabs_s8(int8x8_t a) {
return vabs_s8(a);
}
-// CHECK-LABEL: @test_vabsq_s8(
-// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VABS_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vabsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VABS_I]]
+//
int8x16_t test_vabsq_s8(int8x16_t a) {
return vabsq_s8(a);
}
-// CHECK-LABEL: @test_vabs_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %a)
-// CHECK: ret <4 x i16> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vabs_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> [[VABS_I]])
+// CHECK-NEXT: ret <4 x i16> [[VABS1_I]]
+//
int16x4_t test_vabs_s16(int16x4_t a) {
return vabs_s16(a);
}
-// CHECK-LABEL: @test_vabsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %a)
-// CHECK: ret <8 x i16> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vabsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> [[VABS_I]])
+// CHECK-NEXT: ret <8 x i16> [[VABS1_I]]
+//
int16x8_t test_vabsq_s16(int16x8_t a) {
return vabsq_s16(a);
}
-// CHECK-LABEL: @test_vabs_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vabs_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> [[VABS_I]])
+// CHECK-NEXT: ret <2 x i32> [[VABS1_I]]
+//
int32x2_t test_vabs_s32(int32x2_t a) {
return vabs_s32(a);
}
-// CHECK-LABEL: @test_vabsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vabsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> [[VABS_I]])
+// CHECK-NEXT: ret <4 x i32> [[VABS1_I]]
+//
int32x4_t test_vabsq_s32(int32x4_t a) {
return vabsq_s32(a);
}
-// CHECK-LABEL: @test_vabsq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> %a)
-// CHECK: ret <2 x i64> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vabsq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> [[VABS_I]])
+// CHECK-NEXT: ret <2 x i64> [[VABS1_I]]
+//
int64x2_t test_vabsq_s64(int64x2_t a) {
return vabsq_s64(a);
}
-// CHECK-LABEL: @test_vabs_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vabs_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]])
+// CHECK-NEXT: ret <2 x float> [[VABS1_I]]
+//
float32x2_t test_vabs_f32(float32x2_t a) {
return vabs_f32(a);
}
-// CHECK-LABEL: @test_vabsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vabsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]])
+// CHECK-NEXT: ret <4 x float> [[VABS1_I]]
+//
float32x4_t test_vabsq_f32(float32x4_t a) {
return vabsq_f32(a);
}
-// CHECK-LABEL: @test_vabsq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VABS1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vabsq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[VABS_I]])
+// CHECK-NEXT: ret <2 x double> [[VABS1_I]]
+//
float64x2_t test_vabsq_f64(float64x2_t a) {
return vabsq_f64(a);
}
-// CHECK-LABEL: @test_vuqadd_s8(
-// CHECK: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VUQADD_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuqadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VUQADD_I]]
+//
int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) {
return vuqadd_s8(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s8(
-// CHECK: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VUQADD_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuqaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VUQADD_I]]
+//
int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) {
return vuqaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i16> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuqadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[VUQADD_I]], <4 x i16> [[VUQADD1_I]])
+// CHECK-NEXT: ret <4 x i16> [[VUQADD2_I]]
+//
int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) {
return vuqadd_s16(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i16> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuqaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> [[VUQADD_I]], <8 x i16> [[VUQADD1_I]])
+// CHECK-NEXT: ret <8 x i16> [[VUQADD2_I]]
+//
int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) {
return vuqaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vuqadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i32> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuqadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> [[VUQADD_I]], <2 x i32> [[VUQADD1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VUQADD2_I]]
+//
int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) {
return vuqadd_s32(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: ret <4 x i32> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuqaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> [[VUQADD_I]], <4 x i32> [[VUQADD1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VUQADD2_I]]
+//
int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) {
return vuqaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vuqaddq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: ret <2 x i64> [[VUQADD2_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuqaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> [[VUQADD_I]], <2 x i64> [[VUQADD1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VUQADD2_I]]
+//
int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) {
return vuqaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vcls_s8(
-// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCLS_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcls_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCLS_V_I]]
+//
int8x8_t test_vcls_s8(int8x8_t a) {
return vcls_s8(a);
}
-// CHECK-LABEL: @test_vcls_u8(
-// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCLS_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcls_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCLS_V_I]]
+//
int8x8_t test_vcls_u8(uint8x8_t a) {
return vcls_u8(a);
}
-// CHECK-LABEL: @test_vclsq_s8(
-// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCLSQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vclsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCLSQ_V_I]]
+//
int8x16_t test_vclsq_s8(int8x16_t a) {
return vclsq_s8(a);
}
-// CHECK-LABEL: @test_vclsq_u8(
-// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCLSQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vclsq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCLSQ_V_I]]
+//
int8x16_t test_vclsq_u8(uint8x16_t a) {
return vclsq_u8(a);
}
-// CHECK-LABEL: @test_vcls_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLS_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcls_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vcls_s16(int16x4_t a) {
return vcls_s16(a);
}
-// CHECK-LABEL: @test_vcls_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLS_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vcls_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vcls_u16(uint16x4_t a) {
return vcls_u16(a);
}
-// CHECK-LABEL: @test_vclsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vclsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclsq_s16(int16x8_t a) {
return vclsq_s16(a);
}
-// CHECK-LABEL: @test_vclsq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vclsq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclsq_u16(uint16x8_t a) {
return vclsq_u16(a);
}
-// CHECK-LABEL: @test_vcls_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLS_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcls_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vcls_s32(int32x2_t a) {
return vcls_s32(a);
}
-// CHECK-LABEL: @test_vcls_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLS_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcls_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vcls_u32(uint32x2_t a) {
return vcls_u32(a);
}
-// CHECK-LABEL: @test_vclsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclsq_s32(int32x4_t a) {
return vclsq_s32(a);
}
-// CHECK-LABEL: @test_vclsq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclsq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclsq_u32(uint32x4_t a) {
return vclsq_u32(a);
}
-// CHECK-LABEL: @test_vclz_s8(
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vclz_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
int8x8_t test_vclz_s8(int8x8_t a) {
return vclz_s8(a);
}
-// CHECK-LABEL: @test_vclzq_s8(
-// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
-// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vclzq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <16 x i8> [[VCLZQ_V_I]]
+//
int8x16_t test_vclzq_s8(int8x16_t a) {
return vclzq_s8(a);
}
-// CHECK-LABEL: @test_vclz_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vclz_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vclz_s16(int16x4_t a) {
return vclz_s16(a);
}
-// CHECK-LABEL: @test_vclzq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vclzq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclzq_s16(int16x8_t a) {
return vclzq_s16(a);
}
-// CHECK-LABEL: @test_vclz_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclz_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vclz_s32(int32x2_t a) {
return vclz_s32(a);
}
-// CHECK-LABEL: @test_vclzq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclzq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclzq_s32(int32x4_t a) {
return vclzq_s32(a);
}
-// CHECK-LABEL: @test_vclz_u8(
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vclz_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
uint8x8_t test_vclz_u8(uint8x8_t a) {
return vclz_u8(a);
}
-// CHECK-LABEL: @test_vclzq_u8(
-// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
-// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vclzq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <16 x i8> [[VCLZQ_V_I]]
+//
uint8x16_t test_vclzq_u8(uint8x16_t a) {
return vclzq_u8(a);
}
-// CHECK-LABEL: @test_vclz_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vclz_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vclz_u16(uint16x4_t a) {
return vclz_u16(a);
}
-// CHECK-LABEL: @test_vclzq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vclzq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
uint16x8_t test_vclzq_u16(uint16x8_t a) {
return vclzq_u16(a);
}
-// CHECK-LABEL: @test_vclz_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vclz_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vclz_u32(uint32x2_t a) {
return vclz_u32(a);
}
-// CHECK-LABEL: @test_vclzq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vclzq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
uint32x4_t test_vclzq_u32(uint32x4_t a) {
return vclzq_u32(a);
}
-// CHECK-LABEL: @test_vcnt_s8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcnt_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
int8x8_t test_vcnt_s8(int8x8_t a) {
return vcnt_s8(a);
}
-// CHECK-LABEL: @test_vcntq_s8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcntq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
int8x16_t test_vcntq_s8(int8x16_t a) {
return vcntq_s8(a);
}
-// CHECK-LABEL: @test_vcnt_u8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcnt_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
uint8x8_t test_vcnt_u8(uint8x8_t a) {
return vcnt_u8(a);
}
-// CHECK-LABEL: @test_vcntq_u8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcntq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
uint8x16_t test_vcntq_u8(uint8x16_t a) {
return vcntq_u8(a);
}
-// CHECK-LABEL: @test_vcnt_p8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vcnt_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
poly8x8_t test_vcnt_p8(poly8x8_t a) {
return vcnt_p8(a);
}
-// CHECK-LABEL: @test_vcntq_p8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vcntq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
poly8x16_t test_vcntq_p8(poly8x16_t a) {
return vcntq_p8(a);
}
-// CHECK-LABEL: @test_vmvn_s8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmvn_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
int8x8_t test_vmvn_s8(int8x8_t a) {
return vmvn_s8(a);
}
-// CHECK-LABEL: @test_vmvnq_s8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmvnq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
int8x16_t test_vmvnq_s8(int8x16_t a) {
return vmvnq_s8(a);
}
-// CHECK-LABEL: @test_vmvn_s16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
-// CHECK: ret <4 x i16> [[NEG_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmvn_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <4 x i16> [[NOT_I]]
+//
int16x4_t test_vmvn_s16(int16x4_t a) {
return vmvn_s16(a);
}
-// CHECK-LABEL: @test_vmvnq_s16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
-// CHECK: ret <8 x i16> [[NEG_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmvnq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <8 x i16> [[NOT_I]]
+//
int16x8_t test_vmvnq_s16(int16x8_t a) {
return vmvnq_s16(a);
}
-// CHECK-LABEL: @test_vmvn_s32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
-// CHECK: ret <2 x i32> [[NEG_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmvn_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <2 x i32> [[NOT_I]]
+//
int32x2_t test_vmvn_s32(int32x2_t a) {
return vmvn_s32(a);
}
-// CHECK-LABEL: @test_vmvnq_s32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
-// CHECK: ret <4 x i32> [[NEG_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmvnq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <4 x i32> [[NOT_I]]
+//
int32x4_t test_vmvnq_s32(int32x4_t a) {
return vmvnq_s32(a);
}
-// CHECK-LABEL: @test_vmvn_u8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmvn_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
uint8x8_t test_vmvn_u8(uint8x8_t a) {
return vmvn_u8(a);
}
-// CHECK-LABEL: @test_vmvnq_u8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmvnq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
uint8x16_t test_vmvnq_u8(uint8x16_t a) {
return vmvnq_u8(a);
}
-// CHECK-LABEL: @test_vmvn_u16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
-// CHECK: ret <4 x i16> [[NEG_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmvn_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <4 x i16> [[NOT_I]]
+//
uint16x4_t test_vmvn_u16(uint16x4_t a) {
return vmvn_u16(a);
}
-// CHECK-LABEL: @test_vmvnq_u16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
-// CHECK: ret <8 x i16> [[NEG_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmvnq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <8 x i16> [[NOT_I]]
+//
uint16x8_t test_vmvnq_u16(uint16x8_t a) {
return vmvnq_u16(a);
}
-// CHECK-LABEL: @test_vmvn_u32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
-// CHECK: ret <2 x i32> [[NEG_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmvn_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <2 x i32> [[NOT_I]]
+//
uint32x2_t test_vmvn_u32(uint32x2_t a) {
return vmvn_u32(a);
}
-// CHECK-LABEL: @test_vmvnq_u32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
-// CHECK: ret <4 x i32> [[NEG_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmvnq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <4 x i32> [[NOT_I]]
+//
uint32x4_t test_vmvnq_u32(uint32x4_t a) {
return vmvnq_u32(a);
}
-// CHECK-LABEL: @test_vmvn_p8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmvn_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
poly8x8_t test_vmvn_p8(poly8x8_t a) {
return vmvn_p8(a);
}
-// CHECK-LABEL: @test_vmvnq_p8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmvnq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
poly8x16_t test_vmvnq_p8(poly8x16_t a) {
return vmvnq_p8(a);
}
-// CHECK-LABEL: @test_vrbit_s8(
-// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrbit_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VRBIT_I]]
+//
int8x8_t test_vrbit_s8(int8x8_t a) {
return vrbit_s8(a);
}
-// CHECK-LABEL: @test_vrbitq_s8(
-// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrbitq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VRBIT_I]]
+//
int8x16_t test_vrbitq_s8(int8x16_t a) {
return vrbitq_s8(a);
}
-// CHECK-LABEL: @test_vrbit_u8(
-// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrbit_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VRBIT_I]]
+//
uint8x8_t test_vrbit_u8(uint8x8_t a) {
return vrbit_u8(a);
}
-// CHECK-LABEL: @test_vrbitq_u8(
-// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrbitq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VRBIT_I]]
+//
uint8x16_t test_vrbitq_u8(uint8x16_t a) {
return vrbitq_u8(a);
}
-// CHECK-LABEL: @test_vrbit_p8(
-// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vrbit_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VRBIT_I]]
+//
poly8x8_t test_vrbit_p8(poly8x8_t a) {
return vrbit_p8(a);
}
-// CHECK-LABEL: @test_vrbitq_p8(
-// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VRBIT_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vrbitq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VRBIT_I]]
+//
poly8x16_t test_vrbitq_p8(poly8x16_t a) {
return vrbitq_p8(a);
}
-// CHECK-LABEL: @test_vmovn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmovn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VMOVN_I]]
+//
int8x8_t test_vmovn_s16(int16x8_t a) {
return vmovn_s16(a);
}
-// CHECK-LABEL: @test_vmovn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmovn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VMOVN_I]]
+//
int16x4_t test_vmovn_s32(int32x4_t a) {
return vmovn_s32(a);
}
-// CHECK-LABEL: @test_vmovn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmovn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VMOVN_I]]
+//
int32x2_t test_vmovn_s64(int64x2_t a) {
return vmovn_s64(a);
}
-// CHECK-LABEL: @test_vmovn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vmovn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VMOVN_I]]
+//
uint8x8_t test_vmovn_u16(uint16x8_t a) {
return vmovn_u16(a);
}
-// CHECK-LABEL: @test_vmovn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vmovn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VMOVN_I]]
+//
uint16x4_t test_vmovn_u32(uint32x4_t a) {
return vmovn_u32(a);
}
-// CHECK-LABEL: @test_vmovn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VMOVN_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vmovn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VMOVN_I]]
+//
uint32x2_t test_vmovn_u64(uint64x2_t a) {
return vmovn_u64(a);
}
-// CHECK-LABEL: @test_vmovn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmovn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vmovn_high_s16(int8x8_t a, int16x8_t b) {
return vmovn_high_s16(a, b);
}
-// CHECK-LABEL: @test_vmovn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vmovn_high_s32(int16x4_t a, int32x4_t b) {
return vmovn_high_s32(a, b);
}
-// CHECK-LABEL: @test_vmovn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vmovn_high_s64(int32x2_t a, int64x2_t b) {
return vmovn_high_s64(a, b);
}
-// CHECK-LABEL: @test_vmovn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <8 x i16> %b to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vmovn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vmovn_high_u16(int8x8_t a, int16x8_t b) {
return vmovn_high_u16(a, b);
}
-// CHECK-LABEL: @test_vmovn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <4 x i32> %b to <4 x i16>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vmovn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vmovn_high_u32(int16x4_t a, int32x4_t b) {
return vmovn_high_u32(a, b);
}
-// CHECK-LABEL: @test_vmovn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VMOVN_I_I:%.*]] = trunc <2 x i64> %b to <2 x i32>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmovn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) {
return vmovn_high_u64(a, b);
}
-// CHECK-LABEL: @test_vqmovun_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqmovun_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVUN_V1_I]]
+//
int8x8_t test_vqmovun_s16(int16x8_t a) {
return vqmovun_s16(a);
}
-// CHECK-LABEL: @test_vqmovun_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqmovun_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqmovun_s32(int32x4_t a) {
return vqmovun_s32(a);
}
-// CHECK-LABEL: @test_vqmovun_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqmovun_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqmovun_s64(int64x2_t a) {
return vqmovun_s64(a);
}
-// CHECK-LABEL: @test_vqmovun_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqmovun_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vqmovun_high_s16(uint8x8_t a, int16x8_t b) {
return vqmovun_high_s16(a, b);
}
-// CHECK-LABEL: @test_vqmovun_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %b)
-// CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVUN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqmovun_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vqmovun_high_s32(uint16x4_t a, int32x4_t b) {
return vqmovun_high_s32(a, b);
}
-// CHECK-LABEL: @test_vqmovun_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %b)
-// CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVUN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqmovun_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vqmovun_high_s64(uint32x2_t a, int64x2_t b) {
return vqmovun_high_s64(a, b);
}
-// CHECK-LABEL: @test_vqmovn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqmovn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVN_V1_I]]
+//
int8x8_t test_vqmovn_s16(int16x8_t a) {
return vqmovn_s16(a);
}
-// CHECK-LABEL: @test_vqmovn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqmovn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqmovn_s32(int32x4_t a) {
return vqmovn_s32(a);
}
-// CHECK-LABEL: @test_vqmovn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqmovn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqmovn_s64(int64x2_t a) {
return vqmovn_s64(a);
}
-// CHECK-LABEL: @test_vqmovn_high_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqmovn_high_s16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) {
return vqmovn_high_s16(a, b);
}
-// CHECK-LABEL: @test_vqmovn_high_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %b)
-// CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqmovn_high_s32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) {
return vqmovn_high_s32(a, b);
}
-// CHECK-LABEL: @test_vqmovn_high_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %b)
-// CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqmovn_high_s64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) {
return vqmovn_high_s64(a, b);
}
-// CHECK-LABEL: @test_vqmovn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vqmovn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVN_V1_I]]
+//
uint8x8_t test_vqmovn_u16(uint16x8_t a) {
return vqmovn_u16(a);
}
-// CHECK-LABEL: @test_vqmovn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vqmovn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vqmovn_u32(uint32x4_t a) {
return vqmovn_u32(a);
}
-// CHECK-LABEL: @test_vqmovn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vqmovn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vqmovn_u64(uint64x2_t a) {
return vqmovn_u64(a);
}
-// CHECK-LABEL: @test_vqmovn_high_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vqmovn_high_u16(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) {
return vqmovn_high_u16(a, b);
}
-// CHECK-LABEL: @test_vqmovn_high_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %b)
-// CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vqmovn_high_u32(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) {
return vqmovn_high_u32(a, b);
}
-// CHECK-LABEL: @test_vqmovn_high_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %b)
-// CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vqmovn_high_u64(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) {
return vqmovn_high_u64(a, b);
}
-// CHECK-LABEL: @test_vshll_n_s8(
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
int16x8_t test_vshll_n_s8(int8x8_t a) {
return vshll_n_s8(a, 8);
}
-// CHECK-LABEL: @test_vshll_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
int32x4_t test_vshll_n_s16(int16x4_t a) {
return vshll_n_s16(a, 16);
}
-// CHECK-LABEL: @test_vshll_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
int64x2_t test_vshll_n_s32(int32x2_t a) {
return vshll_n_s32(a, 32);
}
-// CHECK-LABEL: @test_vshll_n_u8(
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
uint16x8_t test_vshll_n_u8(uint8x8_t a) {
return vshll_n_u8(a, 8);
}
-// CHECK-LABEL: @test_vshll_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
uint32x4_t test_vshll_n_u16(uint16x4_t a) {
return vshll_n_u16(a, 16);
}
-// CHECK-LABEL: @test_vshll_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
uint64x2_t test_vshll_n_u32(uint32x2_t a) {
return vshll_n_u32(a, 32);
}
-// CHECK-LABEL: @test_vshll_high_n_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
int16x8_t test_vshll_high_n_s8(int8x16_t a) {
return vshll_high_n_s8(a, 8);
}
-// CHECK-LABEL: @test_vshll_high_n_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
int32x4_t test_vshll_high_n_s16(int16x8_t a) {
return vshll_high_n_s16(a, 16);
}
-// CHECK-LABEL: @test_vshll_high_n_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
int64x2_t test_vshll_high_n_s32(int32x4_t a) {
return vshll_high_n_s32(a, 32);
}
-// CHECK-LABEL: @test_vshll_high_n_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
return vshll_high_n_u8(a, 8);
}
-// CHECK-LABEL: @test_vshll_high_n_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
return vshll_high_n_u16(a, 16);
}
-// CHECK-LABEL: @test_vshll_high_n_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
return vshll_high_n_u32(a, 32);
}
-// CHECK-LABEL: @test_vcvt_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a)
-// CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
-// CHECK: ret <4 x half> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcvt_f16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
+// CHECK-NEXT: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
float16x4_t test_vcvt_f16_f32(float32x4_t a) {
return vcvt_f16_f32(a);
}
-// CHECK-LABEL: @test_vcvt_high_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %b)
-// CHECK: [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x half>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x half> %a, <4 x half> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x half> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcvt_high_f16_f32(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]])
+// CHECK-NEXT: [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
+//
float16x8_t test_vcvt_high_f16_f32(float16x4_t a, float32x4_t b) {
return vcvt_high_f16_f32(a, b);
}
-// CHECK-LABEL: @test_vcvt_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = fptrunc <2 x double> %a to <2 x float>
-// CHECK: ret <2 x float> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_f32_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+//
float32x2_t test_vcvt_f32_f64(float64x2_t a) {
return vcvt_f32_f64(a);
}
-// CHECK-LABEL: @test_vcvt_high_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VCVT_I_I:%.*]] = fptrunc <2 x double> %b to <2 x float>
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVT_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x float> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvt_high_f32_f64(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVT_I_I:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[VCVT_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) {
return vcvt_high_f32_f64(a, b);
}
-// CHECK-LABEL: @test_vcvtx_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a)
-// CHECK: ret <2 x float> [[VCVTX_F32_V1_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvtx_f32_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTX_F32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VCVTX_F32_V1_I]]
+//
float32x2_t test_vcvtx_f32_f64(float64x2_t a) {
return vcvtx_f32_f64(a);
}
-// CHECK-LABEL: @test_vcvtx_high_f32_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b)
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x float> [[SHUFFLE_I_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvtx_high_f32_f64(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTX_F32_V_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I_I]])
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) {
return vcvtx_high_f32_f64(a, b);
}
-// CHECK-LABEL: @test_vcvt_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VCVT_F32_F161_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvt_f32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
+// CHECK-NEXT: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vcvt_f32_f16(float16x4_t a) {
return vcvt_f32_f16(a);
}
-// CHECK-LABEL: @test_vcvt_high_f32_f16(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]])
-// CHECK: [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VCVT_F32_F161_I_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvt_high_f32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]])
+// CHECK-NEXT: [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
return vcvt_high_f32_f16(a);
}
-// CHECK-LABEL: @test_vcvt_f64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = fpext <2 x float> %a to <2 x double>
-// CHECK: ret <2 x double> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvt_f64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[VCVT_I]]
+//
float64x2_t test_vcvt_f64_f32(float32x2_t a) {
return vcvt_f64_f32(a);
}
-// CHECK-LABEL: @test_vcvt_high_f64_f32(
-// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[SHUFFLE_I_I]] to <8 x i8>
-// CHECK: [[VCVT_I_I:%.*]] = fpext <2 x float> [[SHUFFLE_I_I]] to <2 x double>
-// CHECK: ret <2 x double> [[VCVT_I_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvt_high_f64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[SHUFFLE_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_I_I:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[VCVT_I_I]]
+//
float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
return vcvt_high_f64_f32(a);
}
-// CHECK-LABEL: @test_vrndnq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDN1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndnq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> [[VRNDN_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDN1_I]]
+//
float64x2_t test_vrndnq_f64(float64x2_t a) {
return vrndnq_f64(a);
}
-// CHECK-LABEL: @test_vrndaq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDA1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndaq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDA1_I]]
+//
float64x2_t test_vrndaq_f64(float64x2_t a) {
return vrndaq_f64(a);
}
-// CHECK-LABEL: @test_vrndpq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDP1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndpq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDP1_I]]
+//
float64x2_t test_vrndpq_f64(float64x2_t a) {
return vrndpq_f64(a);
}
-// CHECK-LABEL: @test_vrndmq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDM1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndmq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDM1_I]]
+//
float64x2_t test_vrndmq_f64(float64x2_t a) {
return vrndmq_f64(a);
}
-// CHECK-LABEL: @test_vrndxq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDX1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndxq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDX1_I]]
+//
float64x2_t test_vrndxq_f64(float64x2_t a) {
return vrndxq_f64(a);
}
-// CHECK-LABEL: @test_vrndq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDZ1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDZ1_I]]
+//
float64x2_t test_vrndq_f64(float64x2_t a) {
return vrndq_f64(a);
}
-// CHECK-LABEL: @test_vrndiq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRNDI1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrndiq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDIQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDIQ_V1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[VRNDIQ_V_I]])
+// CHECK-NEXT: ret <2 x double> [[VRNDIQ_V1_I]]
+//
float64x2_t test_vrndiq_f64(float64x2_t a) {
return vrndiq_f64(a);
}
-// CHECK-LABEL: @test_vcvt_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> [[VCVTZ_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTZ1_I]]
+//
int32x2_t test_vcvt_s32_f32(float32x2_t a) {
return vcvt_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> [[VCVTZ_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTZ1_I]]
+//
int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
return vcvtq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> [[VCVTZ_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTZ1_I]]
+//
int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
return vcvtq_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvt_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> [[VCVTZ_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTZ1_I]]
+//
uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
return vcvt_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[TMP1]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> [[VCVTZ_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTZ1_I]]
+//
uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
return vcvtq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[TMP1]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> [[VCVTZ_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTZ1_I]]
+//
uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
return vcvtq_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtn_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtn_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> [[VCVTN_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTN1_I]]
+//
int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
return vcvtn_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtnq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtnq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> [[VCVTN_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTN1_I]]
+//
int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
return vcvtnq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtnq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtnq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> [[VCVTN_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTN1_I]]
+//
int64x2_t test_vcvtnq_s64_f64(float64x2_t a) {
return vcvtnq_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtn_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtn_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTN1_I]]
+//
uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
return vcvtn_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtnq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtnq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> [[VCVTN_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTN1_I]]
+//
uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
return vcvtnq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtnq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTN1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtnq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> [[VCVTN_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTN1_I]]
+//
uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) {
return vcvtnq_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtp_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtp_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> [[VCVTP_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTP1_I]]
+//
int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
return vcvtp_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtpq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtpq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> [[VCVTP_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTP1_I]]
+//
int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
return vcvtpq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtpq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtpq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> [[VCVTP_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTP1_I]]
+//
int64x2_t test_vcvtpq_s64_f64(float64x2_t a) {
return vcvtpq_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtp_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtp_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTP1_I]]
+//
uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
return vcvtp_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtpq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtpq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> [[VCVTP_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTP1_I]]
+//
uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
return vcvtpq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtpq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTP1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtpq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> [[VCVTP_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTP1_I]]
+//
uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) {
return vcvtpq_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvtm_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtm_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> [[VCVTM_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTM1_I]]
+//
int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
return vcvtm_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtmq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtmq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> [[VCVTM_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTM1_I]]
+//
int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
return vcvtmq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtmq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtmq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> [[VCVTM_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTM1_I]]
+//
int64x2_t test_vcvtmq_s64_f64(float64x2_t a) {
return vcvtmq_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvtm_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtm_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTM1_I]]
+//
uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
return vcvtm_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtmq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtmq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> [[VCVTM_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTM1_I]]
+//
uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
return vcvtmq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtmq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTM1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtmq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> [[VCVTM_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTM1_I]]
+//
uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) {
return vcvtmq_u64_f64(a);
}
-// CHECK-LABEL: @test_vcvta_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvta_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> [[VCVTA_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTA1_I]]
+//
int32x2_t test_vcvta_s32_f32(float32x2_t a) {
return vcvta_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtaq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtaq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> [[VCVTA_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTA1_I]]
+//
int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
return vcvtaq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtaq_s64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtaq_s64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> [[VCVTA_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTA1_I]]
+//
int64x2_t test_vcvtaq_s64_f64(float64x2_t a) {
return vcvtaq_s64_f64(a);
}
-// CHECK-LABEL: @test_vcvta_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a)
-// CHECK: ret <2 x i32> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vcvta_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> [[VCVTA_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCVTA1_I]]
+//
uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
return vcvta_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtaq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a)
-// CHECK: ret <4 x i32> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtaq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> [[VCVTA_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCVTA1_I]]
+//
uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
return vcvtaq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtaq_u64_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a)
-// CHECK: ret <2 x i64> [[VCVTA1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtaq_u64_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> [[VCVTA_I]])
+// CHECK-NEXT: ret <2 x i64> [[VCVTA1_I]]
+//
uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) {
return vcvtaq_u64_f64(a);
}
-// CHECK-LABEL: @test_vrsqrte_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrsqrte_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VRSQRTE_V1_I]]
+//
float32x2_t test_vrsqrte_f32(float32x2_t a) {
return vrsqrte_f32(a);
}
-// CHECK-LABEL: @test_vrsqrteq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrsqrteq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]])
+// CHECK-NEXT: ret <4 x float> [[VRSQRTEQ_V1_I]]
+//
float32x4_t test_vrsqrteq_f32(float32x4_t a) {
return vrsqrteq_f32(a);
}
-// CHECK-LABEL: @test_vrsqrteq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRSQRTEQ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrsqrteq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> [[VRSQRTEQ_V_I]])
+// CHECK-NEXT: ret <2 x double> [[VRSQRTEQ_V1_I]]
+//
float64x2_t test_vrsqrteq_f64(float64x2_t a) {
return vrsqrteq_f64(a);
}
-// CHECK-LABEL: @test_vrecpe_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRECPE_V1_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrecpe_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> [[VRECPE_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VRECPE_V1_I]]
+//
float32x2_t test_vrecpe_f32(float32x2_t a) {
return vrecpe_f32(a);
}
-// CHECK-LABEL: @test_vrecpeq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrecpeq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> [[VRECPEQ_V_I]])
+// CHECK-NEXT: ret <4 x float> [[VRECPEQ_V1_I]]
+//
float32x4_t test_vrecpeq_f32(float32x4_t a) {
return vrecpeq_f32(a);
}
-// CHECK-LABEL: @test_vrecpeq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VRECPEQ_V1_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrecpeq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> [[VRECPEQ_V_I]])
+// CHECK-NEXT: ret <2 x double> [[VRECPEQ_V1_I]]
+//
float64x2_t test_vrecpeq_f64(float64x2_t a) {
return vrecpeq_f64(a);
}
-// CHECK-LABEL: @test_vrecpe_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VRECPE_V1_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vrecpe_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> [[VRECPE_V_I]])
+// CHECK-NEXT: ret <2 x i32> [[VRECPE_V1_I]]
+//
uint32x2_t test_vrecpe_u32(uint32x2_t a) {
return vrecpe_u32(a);
}
-// CHECK-LABEL: @test_vrecpeq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vrecpeq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]])
+// CHECK-NEXT: ret <4 x i32> [[VRECPEQ_V1_I]]
+//
uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
return vrecpeq_u32(a);
}
-// CHECK-LABEL: @test_vsqrt_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VSQRT_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vsqrt_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP2]])
+// CHECK-NEXT: ret <2 x float> [[VSQRT_I]]
+//
float32x2_t test_vsqrt_f32(float32x2_t a) {
return vsqrt_f32(a);
}
-// CHECK-LABEL: @test_vsqrtq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VSQRT_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vsqrtq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
+// CHECK-NEXT: ret <4 x float> [[VSQRT_I]]
+//
float32x4_t test_vsqrtq_f32(float32x4_t a) {
return vsqrtq_f32(a);
}
-// CHECK-LABEL: @test_vsqrtq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[VSQRT_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vsqrtq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
+// CHECK-NEXT: ret <2 x double> [[VSQRT_I]]
+//
float64x2_t test_vsqrtq_f64(float64x2_t a) {
return vsqrtq_f64(a);
}
-// CHECK-LABEL: @test_vcvt_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+//
float32x2_t test_vcvt_f32_s32(int32x2_t a) {
return vcvt_f32_s32(a);
}
-// CHECK-LABEL: @test_vcvt_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+//
float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
return vcvt_f32_u32(a);
}
-// CHECK-LABEL: @test_vcvtq_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvtq_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[VCVT_I]]
+//
float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
return vcvtq_f32_s32(a);
}
-// CHECK-LABEL: @test_vcvtq_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcvtq_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[VCVT_I]]
+//
float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
return vcvtq_f32_u32(a);
}
-// CHECK-LABEL: @test_vcvtq_f64_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i64> %a to <2 x double>
-// CHECK: ret <2 x double> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvtq_f64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[VCVT_I]]
+//
float64x2_t test_vcvtq_f64_s64(int64x2_t a) {
return vcvtq_f64_s64(a);
}
-// CHECK-LABEL: @test_vcvtq_f64_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i64> %a to <2 x double>
-// CHECK: ret <2 x double> [[VCVT_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcvtq_f64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[VCVT_I]]
+//
float64x2_t test_vcvtq_f64_u64(uint64x2_t a) {
return vcvtq_f64_u64(a);
}
diff --git a/clang/test/CodeGen/AArch64/neon-perm.c b/clang/test/CodeGen/AArch64/neon-perm.c
index 1ffbbd5d9bc42..61b24c55a39a3 100644
--- a/clang/test/CodeGen/AArch64/neon-perm.c
+++ b/clang/test/CodeGen/AArch64/neon-perm.c
@@ -1,1932 +1,2395 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vuzp1_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp1_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
return vuzp1_s8(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp1q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
return vuzp1q_s8(a, b);
}
-// CHECK-LABEL: @test_vuzp1_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp1_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
return vuzp1_s16(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp1q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
return vuzp1q_s16(a, b);
}
-// CHECK-LABEL: @test_vuzp1_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuzp1_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
return vuzp1_s32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuzp1q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
return vuzp1q_s32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp1q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
return vuzp1q_s64(a, b);
}
-// CHECK-LABEL: @test_vuzp1_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp1_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
return vuzp1_u8(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp1q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
return vuzp1q_u8(a, b);
}
-// CHECK-LABEL: @test_vuzp1_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp1_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
return vuzp1_u16(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp1q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
return vuzp1q_u16(a, b);
}
-// CHECK-LABEL: @test_vuzp1_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuzp1_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
return vuzp1_u32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuzp1q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
return vuzp1q_u32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp1q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
return vuzp1q_u64(a, b);
}
-// CHECK-LABEL: @test_vuzp1_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vuzp1_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
return vuzp1_f32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vuzp1q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
return vuzp1q_f32(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vuzp1q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
return vuzp1q_f64(a, b);
}
-// CHECK-LABEL: @test_vuzp1_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp1_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
return vuzp1_p8(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp1q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
return vuzp1q_p8(a, b);
}
-// CHECK-LABEL: @test_vuzp1_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp1_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
return vuzp1_p16(a, b);
}
-// CHECK-LABEL: @test_vuzp1q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp1q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
return vuzp1q_p16(a, b);
}
-// CHECK-LABEL: @test_vuzp2_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp2_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
return vuzp2_s8(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp2q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
return vuzp2q_s8(a, b);
}
-// CHECK-LABEL: @test_vuzp2_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp2_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
return vuzp2_s16(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp2q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
return vuzp2q_s16(a, b);
}
-// CHECK-LABEL: @test_vuzp2_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuzp2_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
return vuzp2_s32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuzp2q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
return vuzp2q_s32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp2q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
return vuzp2q_s64(a, b);
}
-// CHECK-LABEL: @test_vuzp2_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp2_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
return vuzp2_u8(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp2q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
return vuzp2q_u8(a, b);
}
-// CHECK-LABEL: @test_vuzp2_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp2_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
return vuzp2_u16(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp2q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
return vuzp2q_u16(a, b);
}
-// CHECK-LABEL: @test_vuzp2_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vuzp2_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
return vuzp2_u32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vuzp2q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
return vuzp2q_u32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp2q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
return vuzp2q_u64(a, b);
}
-// CHECK-LABEL: @test_vuzp2_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vuzp2_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
return vuzp2_f32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vuzp2q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
return vuzp2q_f32(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vuzp2q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
return vuzp2q_f64(a, b);
}
-// CHECK-LABEL: @test_vuzp2_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp2_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
return vuzp2_p8(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp2q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
return vuzp2q_p8(a, b);
}
-// CHECK-LABEL: @test_vuzp2_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vuzp2_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
return vuzp2_p16(a, b);
}
-// CHECK-LABEL: @test_vuzp2q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vuzp2q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
return vuzp2q_p16(a, b);
}
-// CHECK-LABEL: @test_vzip1_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip1_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
return vzip1_s8(a, b);
}
-// CHECK-LABEL: @test_vzip1q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip1q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
return vzip1q_s8(a, b);
}
-// CHECK-LABEL: @test_vzip1_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip1_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
return vzip1_s16(a, b);
}
-// CHECK-LABEL: @test_vzip1q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip1q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
return vzip1q_s16(a, b);
}
-// CHECK-LABEL: @test_vzip1_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vzip1_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
return vzip1_s32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vzip1q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
return vzip1q_s32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip1q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
return vzip1q_s64(a, b);
}
-// CHECK-LABEL: @test_vzip1_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip1_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
return vzip1_u8(a, b);
}
-// CHECK-LABEL: @test_vzip1q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip1q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
return vzip1q_u8(a, b);
}
-// CHECK-LABEL: @test_vzip1_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip1_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
return vzip1_u16(a, b);
}
-// CHECK-LABEL: @test_vzip1q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip1q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
return vzip1q_u16(a, b);
}
-// CHECK-LABEL: @test_vzip1_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vzip1_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
return vzip1_u32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vzip1q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
return vzip1q_u32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip1q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
return vzip1q_u64(a, b);
}
-// CHECK-LABEL: @test_vzip1_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vzip1_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
return vzip1_f32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vzip1q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
return vzip1q_f32(a, b);
}
-// CHECK-LABEL: @test_vzip1q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vzip1q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
return vzip1q_f64(a, b);
}
-// CHECK-LABEL: @test_vzip1_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip1_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
return vzip1_p8(a, b);
}
-// CHECK-LABEL: @test_vzip1q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip1q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
return vzip1q_p8(a, b);
}
-// CHECK-LABEL: @test_vzip1_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip1_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
return vzip1_p16(a, b);
}
-// CHECK-LABEL: @test_vzip1q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip1q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
return vzip1q_p16(a, b);
}
-// CHECK-LABEL: @test_vzip2_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip2_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
return vzip2_s8(a, b);
}
-// CHECK-LABEL: @test_vzip2q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip2q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
return vzip2q_s8(a, b);
}
-// CHECK-LABEL: @test_vzip2_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip2_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
return vzip2_s16(a, b);
}
-// CHECK-LABEL: @test_vzip2q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip2q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
return vzip2q_s16(a, b);
}
-// CHECK-LABEL: @test_vzip2_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vzip2_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
return vzip2_s32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vzip2q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
return vzip2q_s32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip2q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
return vzip2q_s64(a, b);
}
-// CHECK-LABEL: @test_vzip2_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip2_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
return vzip2_u8(a, b);
}
-// CHECK-LABEL: @test_vzip2q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip2q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
return vzip2q_u8(a, b);
}
-// CHECK-LABEL: @test_vzip2_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip2_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
return vzip2_u16(a, b);
}
-// CHECK-LABEL: @test_vzip2q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip2q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
return vzip2q_u16(a, b);
}
-// CHECK-LABEL: @test_vzip2_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vzip2_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
return vzip2_u32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vzip2q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
return vzip2q_u32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip2q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
return vzip2q_u64(a, b);
}
-// CHECK-LABEL: @test_vzip2_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vzip2_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
return vzip2_f32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vzip2q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
return vzip2q_f32(a, b);
}
-// CHECK-LABEL: @test_vzip2q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vzip2q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
return vzip2q_f64(a, b);
}
-// CHECK-LABEL: @test_vzip2_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vzip2_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
return vzip2_p8(a, b);
}
-// CHECK-LABEL: @test_vzip2q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vzip2q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
return vzip2q_p8(a, b);
}
-// CHECK-LABEL: @test_vzip2_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vzip2_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
return vzip2_p16(a, b);
}
-// CHECK-LABEL: @test_vzip2q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vzip2q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
return vzip2q_p16(a, b);
}
-// CHECK-LABEL: @test_vtrn1_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn1_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
return vtrn1_s8(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn1q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
return vtrn1q_s8(a, b);
}
-// CHECK-LABEL: @test_vtrn1_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn1_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
return vtrn1_s16(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn1q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
return vtrn1q_s16(a, b);
}
-// CHECK-LABEL: @test_vtrn1_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtrn1_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
return vtrn1_s32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtrn1q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
return vtrn1q_s32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn1q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
return vtrn1q_s64(a, b);
}
-// CHECK-LABEL: @test_vtrn1_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn1_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
return vtrn1_u8(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn1q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
return vtrn1q_u8(a, b);
}
-// CHECK-LABEL: @test_vtrn1_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn1_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
return vtrn1_u16(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn1q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
return vtrn1q_u16(a, b);
}
-// CHECK-LABEL: @test_vtrn1_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtrn1_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
return vtrn1_u32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtrn1q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
return vtrn1q_u32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn1q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
return vtrn1q_u64(a, b);
}
-// CHECK-LABEL: @test_vtrn1_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vtrn1_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
return vtrn1_f32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vtrn1q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
return vtrn1q_f32(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vtrn1q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
return vtrn1q_f64(a, b);
}
-// CHECK-LABEL: @test_vtrn1_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn1_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
return vtrn1_p8(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn1q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
return vtrn1q_p8(a, b);
}
-// CHECK-LABEL: @test_vtrn1_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn1_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
return vtrn1_p16(a, b);
}
-// CHECK-LABEL: @test_vtrn1q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn1q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
return vtrn1q_p16(a, b);
}
-// CHECK-LABEL: @test_vtrn2_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn2_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
return vtrn2_s8(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn2q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
return vtrn2q_s8(a, b);
}
-// CHECK-LABEL: @test_vtrn2_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn2_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
return vtrn2_s16(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn2q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
return vtrn2q_s16(a, b);
}
-// CHECK-LABEL: @test_vtrn2_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtrn2_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
return vtrn2_s32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtrn2q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
return vtrn2q_s32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn2q_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
return vtrn2q_s64(a, b);
}
-// CHECK-LABEL: @test_vtrn2_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn2_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
return vtrn2_u8(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn2q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
return vtrn2q_u8(a, b);
}
-// CHECK-LABEL: @test_vtrn2_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn2_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
return vtrn2_u16(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn2q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
return vtrn2q_u16(a, b);
}
-// CHECK-LABEL: @test_vtrn2_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vtrn2_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
return vtrn2_u32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vtrn2q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
return vtrn2q_u32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn2q_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
return vtrn2q_u64(a, b);
}
-// CHECK-LABEL: @test_vtrn2_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vtrn2_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
return vtrn2_f32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vtrn2q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
return vtrn2q_f32(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_f64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x double> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vtrn2q_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x double> [[SHUFFLE_I]]
+//
float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
return vtrn2q_f64(a, b);
}
-// CHECK-LABEL: @test_vtrn2_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn2_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
return vtrn2_p8(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn2q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
return vtrn2q_p8(a, b);
}
-// CHECK-LABEL: @test_vtrn2_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <4 x i16> @test_vtrn2_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
return vtrn2_p16(a, b);
}
-// CHECK-LABEL: @test_vtrn2q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <8 x i16> @test_vtrn2q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
return vtrn2q_p16(a, b);
}
-// CHECK-LABEL: @test_vuzp_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vuzp_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
return vuzp_s8(a, b);
}
-// CHECK-LABEL: @test_vuzp_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vuzp_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
return vuzp_s16(a, b);
}
-// CHECK-LABEL: @test_vuzp_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vuzp_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
return vuzp_s32(a, b);
}
-// CHECK-LABEL: @test_vuzp_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vuzp_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
return vuzp_u8(a, b);
}
-// CHECK-LABEL: @test_vuzp_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vuzp_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
return vuzp_u16(a, b);
}
-// CHECK-LABEL: @test_vuzp_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vuzp_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
return vuzp_u32(a, b);
}
-// CHECK-LABEL: @test_vuzp_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vuzp_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT3]], <2 x float> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] poison, <2 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
return vuzp_f32(a, b);
}
-// CHECK-LABEL: @test_vuzp_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vuzp_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
return vuzp_p8(a, b);
}
-// CHECK-LABEL: @test_vuzp_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vuzp_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
return vuzp_p16(a, b);
}
-// CHECK-LABEL: @test_vuzpq_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vuzpq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
return vuzpq_s8(a, b);
}
-// CHECK-LABEL: @test_vuzpq_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vuzpq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
return vuzpq_s16(a, b);
}
-// CHECK-LABEL: @test_vuzpq_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vuzpq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
return vuzpq_s32(a, b);
}
-// CHECK-LABEL: @test_vuzpq_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vuzpq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
return vuzpq_u8(a, b);
}
-// CHECK-LABEL: @test_vuzpq_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vuzpq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
return vuzpq_u16(a, b);
}
-// CHECK-LABEL: @test_vuzpq_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vuzpq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
return vuzpq_u32(a, b);
}
-// CHECK-LABEL: @test_vuzpq_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x float> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x float> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vuzpq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x float> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] poison, <4 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
return vuzpq_f32(a, b);
}
-// CHECK-LABEL: @test_vuzpq_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vuzpq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
return vuzpq_p8(a, b);
}
-// CHECK-LABEL: @test_vuzpq_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vuzpq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
return vuzpq_p16(a, b);
}
-// CHECK-LABEL: @test_vzip_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vzip_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
return vzip_s8(a, b);
}
-// CHECK-LABEL: @test_vzip_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vzip_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
return vzip_s16(a, b);
}
-// CHECK-LABEL: @test_vzip_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vzip_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
return vzip_s32(a, b);
}
-// CHECK-LABEL: @test_vzip_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vzip_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
return vzip_u8(a, b);
}
-// CHECK-LABEL: @test_vzip_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vzip_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
return vzip_u16(a, b);
}
-// CHECK-LABEL: @test_vzip_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vzip_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
return vzip_u32(a, b);
}
-// CHECK-LABEL: @test_vzip_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vzip_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT3]], <2 x float> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] poison, <2 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
return vzip_f32(a, b);
}
-// CHECK-LABEL: @test_vzip_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vzip_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
return vzip_p8(a, b);
}
-// CHECK-LABEL: @test_vzip_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vzip_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
return vzip_p16(a, b);
}
-// CHECK-LABEL: @test_vzipq_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vzipq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
return vzipq_s8(a, b);
}
-// CHECK-LABEL: @test_vzipq_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vzipq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
return vzipq_s16(a, b);
}
-// CHECK-LABEL: @test_vzipq_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vzipq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
return vzipq_s32(a, b);
}
-// CHECK-LABEL: @test_vzipq_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vzipq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
return vzipq_u8(a, b);
}
-// CHECK-LABEL: @test_vzipq_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vzipq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
return vzipq_u16(a, b);
}
-// CHECK-LABEL: @test_vzipq_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vzipq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
return vzipq_u32(a, b);
}
-// CHECK-LABEL: @test_vzipq_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x float> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x float> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vzipq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x float> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] poison, <4 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
return vzipq_f32(a, b);
}
-// CHECK-LABEL: @test_vzipq_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vzipq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
return vzipq_p8(a, b);
}
-// CHECK-LABEL: @test_vzipq_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vzipq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
return vzipq_p16(a, b);
}
-// CHECK-LABEL: @test_vtrn_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vtrn_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
return vtrn_s8(a, b);
}
-// CHECK-LABEL: @test_vtrn_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vtrn_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
return vtrn_s16(a, b);
}
-// CHECK-LABEL: @test_vtrn_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.int32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vtrn_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
return vtrn_s32(a, b);
}
-// CHECK-LABEL: @test_vtrn_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vtrn_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
return vtrn_u8(a, b);
}
-// CHECK-LABEL: @test_vtrn_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vtrn_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
return vtrn_u16(a, b);
}
-// CHECK-LABEL: @test_vtrn_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.uint32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vtrn_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
return vtrn_u32(a, b);
}
-// CHECK-LABEL: @test_vtrn_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
-// CHECK: store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.float32x2x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vtrn_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT3]], <2 x float> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] poison, <2 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
return vtrn_f32(a, b);
}
-// CHECK-LABEL: @test_vtrn_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
-// CHECK: store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly8x8x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vtrn_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
return vtrn_p8(a, b);
}
-// CHECK-LABEL: @test_vtrn_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly16x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vtrn_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
return vtrn_p16(a, b);
}
-// CHECK-LABEL: @test_vtrnq_s8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vtrnq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
return vtrnq_s8(a, b);
}
-// CHECK-LABEL: @test_vtrnq_s16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vtrnq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
return vtrnq_s16(a, b);
}
-// CHECK-LABEL: @test_vtrnq_s32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.int32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vtrnq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
return vtrnq_s32(a, b);
}
-// CHECK-LABEL: @test_vtrnq_u8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vtrnq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
return vtrnq_u8(a, b);
}
-// CHECK-LABEL: @test_vtrnq_u16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vtrnq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
return vtrnq_u16(a, b);
}
-// CHECK-LABEL: @test_vtrnq_u32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.uint32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vtrnq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
return vtrnq_u32(a, b);
}
-// CHECK-LABEL: @test_vtrnq_f32(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x float> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x float> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
-// CHECK: store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.float32x4x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vtrnq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x float> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] poison, <4 x float> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
+//
float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
return vtrnq_f32(a, b);
}
-// CHECK-LABEL: @test_vtrnq_p8(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
-// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
-// CHECK: store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
-// CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly8x16x2_t [[TMP8]]
+// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vtrnq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
return vtrnq_p8(a, b);
}
-// CHECK-LABEL: @test_vtrnq_p16(
-// CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
-// CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
-// CHECK: [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
-// CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
-// CHECK: store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
-// CHECK: [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly16x8x2_t [[TMP10]]
+// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vtrnq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
return vtrnq_p16(a, b);
}
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem-constrained.c b/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem-constrained.c
index 1d0db697e4fdd..d56dc193d7f1e 100644
--- a/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem-constrained.c
+++ b/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem-constrained.c
@@ -1,17 +1,11 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefix=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
// RUN: -ffp-exception-behavior=strict \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
-// RUN: -ffp-exception-behavior=strict \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefix=CONSTRAINED %s
// REQUIRES: aarch64-registered-target
@@ -19,112 +13,248 @@
#include <arm_neon.h>
-// COMMON-LABEL: test_vfmas_lane_f32
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
-// CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float %b, float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
-// COMMONIR: ret float [[TMP2]]
+// UNCONSTRAINED-LABEL: define dso_local float @test_vfmas_lane_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[B]], float [[EXTRACT]], float [[A]])
+// UNCONSTRAINED-NEXT: ret float [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local float @test_vfmas_lane_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[B]], float [[EXTRACT]], float [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2:[0-9]+]]
+// CONSTRAINED-NEXT: ret float [[TMP0]]
+//
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmas_lane_f32(a, b, c, 1);
}
-// COMMON-LABEL: test_vfmad_lane_f64
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
-// UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: ret double [[TMP2]]
+// UNCONSTRAINED-LABEL: define dso_local double @test_vfmad_lane_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[C]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B]], double [[EXTRACT]], double [[A]])
+// UNCONSTRAINED-NEXT: ret double [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local double @test_vfmad_lane_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[C]], i32 0
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[B]], double [[EXTRACT]], double [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret double [[TMP0]]
+//
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
return vfmad_lane_f64(a, b, c, 0);
}
-// COMMON-LABEL: test_vfmad_laneq_f64
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
-// UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla d{{[0-9]+}}, d{{[0-9]+}}, v{{[0-9]+}}.d[{{[0-9]+}}]
-// COMMONIR: ret double [[TMP2]]
+// UNCONSTRAINED-LABEL: define dso_local double @test_vfmad_laneq_f64(
+// UNCONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <2 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B]], double [[EXTRACT]], double [[A]])
+// UNCONSTRAINED-NEXT: ret double [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local double @test_vfmad_laneq_f64(
+// CONSTRAINED-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <2 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[C]], i32 1
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[B]], double [[EXTRACT]], double [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret double [[TMP0]]
+//
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
return vfmad_laneq_f64(a, b, c, 1);
}
-// COMMON-LABEL: test_vfmss_lane_f32
-// COMMONIR: [[SUB:%.*]] = fneg float %b
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
-// CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[SUB]], float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
-// COMMONIR: ret float [[TMP2]]
+// UNCONSTRAINED-LABEL: define dso_local float @test_vfmss_lane_f32(
+// UNCONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[B]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[FNEG]], float [[EXTRACT]], float [[A]])
+// UNCONSTRAINED-NEXT: ret float [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local float @test_vfmss_lane_f32(
+// CONSTRAINED-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[B]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[FNEG]], float [[EXTRACT]], float [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret float [[TMP0]]
+//
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmss_lane_f32(a, b, c, 1);
}
-// COMMON-LABEL: test_vfma_lane_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: ret <1 x double> [[FMLA2]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// UNCONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// UNCONSTRAINED-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// CONSTRAINED-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <1 x double> [[FMLA2]]
+//
float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfma_lane_f64(a, b, v, 0);
}
-// COMMON-LABEL: test_vfms_lane_f64
-// COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: ret <1 x double> [[FMLA2]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// UNCONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// UNCONSTRAINED-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <1 x double> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// CONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// CONSTRAINED-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <1 x double> [[FMLA2]]
+//
float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, 0);
}
-// COMMON-LABEL: test_vfma_laneq_f64
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// COMMONIR: ret <1 x double> [[TMP7]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// UNCONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
+// UNCONSTRAINED-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// UNCONSTRAINED-NEXT: ret <1 x double> [[TMP10]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// CONSTRAINED-NEXT: ret <1 x double> [[TMP10]]
+//
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, 0);
}
-// COMMON-LABEL: test_vfms_laneq_f64
-// COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b
-// CHECK-ASM: fneg d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// COMMONIR: ret <1 x double> [[TMP7]]
+// UNCONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
+// UNCONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// UNCONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// UNCONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
+// UNCONSTRAINED-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// UNCONSTRAINED-NEXT: ret <1 x double> [[TMP10]]
+//
+// CONSTRAINED-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
+// CONSTRAINED-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CONSTRAINED-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// CONSTRAINED-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// CONSTRAINED-NEXT: ret <1 x double> [[TMP10]]
+//
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfms_laneq_f64(a, b, v, 0);
}
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c
index 8b7b976ab5e5a..9b98126500444 100644
--- a/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c
+++ b/clang/test/CodeGen/AArch64/neon-scalar-x-indexed-elem.c
@@ -1,419 +1,565 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: define{{.*}} float @test_vmuls_lane_f32(float noundef %a, <2 x float> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
-// CHECK: [[MUL:%.*]] = fmul float %a, [[VGET_LANE]]
-// CHECK: ret float [[MUL]]
+// CHECK-LABEL: define dso_local float @test_vmuls_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x float> [[B]], i32 1
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[A]], [[VGET_LANE]]
+// CHECK-NEXT: ret float [[MUL]]
+//
float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
return vmuls_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} double @test_vmuld_lane_f64(double noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK: [[MUL:%.*]] = fmul double %a, [[VGET_LANE]]
-// CHECK: ret double [[MUL]]
+// CHECK-LABEL: define dso_local double @test_vmuld_lane_f64(
+// CHECK-SAME: double noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[B]], i32 0
+// CHECK-NEXT: [[MUL:%.*]] = fmul double [[A]], [[VGET_LANE]]
+// CHECK-NEXT: ret double [[MUL]]
+//
float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
return vmuld_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} float @test_vmuls_laneq_f32(float noundef %a, <4 x float> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
-// CHECK: [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]]
-// CHECK: ret float [[MUL]]
+// CHECK-LABEL: define dso_local float @test_vmuls_laneq_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[B]], i32 3
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[A]], [[VGETQ_LANE]]
+// CHECK-NEXT: ret float [[MUL]]
+//
float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
return vmuls_laneq_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} double @test_vmuld_laneq_f64(double noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK: [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]]
-// CHECK: ret double [[MUL]]
+// CHECK-LABEL: define dso_local double @test_vmuld_laneq_f64(
+// CHECK-SAME: double noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B]], i32 1
+// CHECK-NEXT: [[MUL:%.*]] = fmul double [[A]], [[VGETQ_LANE]]
+// CHECK-NEXT: ret double [[MUL]]
+//
float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
return vmuld_laneq_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmul_n_f64(<1 x double> noundef %a, double noundef %b) #0 {
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %a to double
-// CHECK: [[TMP3:%.*]] = fmul double [[TMP2]], %b
-// CHECK: [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double>
-// CHECK: ret <1 x double> [[TMP4]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmul_n_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = fmul double [[TMP0]], [[B]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP2]]
+//
float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
return vmul_n_f64(a, b);
}
-// CHECK-LABEL: define{{.*}} float @test_vmulxs_lane_f32(float noundef %a, <2 x float> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
-// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
-// CHECK: ret float [[VMULXS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vmulxs_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x float> [[B]], i32 1
+// CHECK-NEXT: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float [[A]], float [[VGET_LANE]])
+// CHECK-NEXT: ret float [[VMULXS_F32_I]]
+//
float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
return vmulxs_lane_f32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} float @test_vmulxs_laneq_f32(float noundef %a, <4 x float> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
-// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
-// CHECK: ret float [[VMULXS_F32_I]]
+// CHECK-LABEL: define dso_local float @test_vmulxs_laneq_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[B]], i32 3
+// CHECK-NEXT: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float [[A]], float [[VGETQ_LANE]])
+// CHECK-NEXT: ret float [[VMULXS_F32_I]]
+//
float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
return vmulxs_laneq_f32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} double @test_vmulxd_lane_f64(double noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
-// CHECK: ret double [[VMULXD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vmulxd_lane_f64(
+// CHECK-SAME: double noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[B]], i32 0
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[A]], double [[VGET_LANE]])
+// CHECK-NEXT: ret double [[VMULXD_F64_I]]
+//
float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
return vmulxd_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} double @test_vmulxd_laneq_f64(double noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
-// CHECK: ret double [[VMULXD_F64_I]]
+// CHECK-LABEL: define dso_local double @test_vmulxd_laneq_f64(
+// CHECK-SAME: double noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B]], i32 1
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[A]], double [[VGETQ_LANE]])
+// CHECK-NEXT: ret double [[VMULXD_F64_I]]
+//
float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
return vmulxd_laneq_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK: ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_lane_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x double> [[B]], i32 0
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE4]])
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
+//
float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
return vmulx_lane_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_0(<1 x double> noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK: ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_0(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B]], i32 0
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
+//
float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_1(<1 x double> noundef %a, <2 x double> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
-// CHECK: ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_1(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[A]], i32 0
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[B]], i32 1
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[A]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
+//
float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} float @test_vfmas_lane_f32(float noundef %a, float noundef %b, <2 x float> noundef %c) #0 {
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
-// CHECK: ret float [[TMP2]]
+// CHECK-LABEL: define dso_local float @test_vfmas_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[B]], float [[EXTRACT]], float [[A]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmas_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: define{{.*}} double @test_vfmad_lane_f64(double noundef %a, double noundef %b, <1 x double> noundef %c) #0 {
-// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
-// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CHECK: ret double [[TMP2]]
+// CHECK-LABEL: define dso_local double @test_vfmad_lane_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <1 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[C]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B]], double [[EXTRACT]], double [[A]])
+// CHECK-NEXT: ret double [[TMP0]]
+//
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
return vfmad_lane_f64(a, b, c, 0);
}
-// CHECK-LABEL: define{{.*}} double @test_vfmad_laneq_f64(double noundef %a, double noundef %b, <2 x double> noundef %c) #0 {
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
-// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
-// CHECK: ret double [[TMP2]]
+// CHECK-LABEL: define dso_local double @test_vfmad_laneq_f64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]], <2 x double> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[B]], double [[EXTRACT]], double [[A]])
+// CHECK-NEXT: ret double [[TMP0]]
+//
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
return vfmad_laneq_f64(a, b, c, 1);
}
-// CHECK-LABEL: define{{.*}} float @test_vfmss_lane_f32(float noundef %a, float noundef %b, <2 x float> noundef %c) #0 {
-// CHECK: [[SUB:%.*]] = fneg float %b
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
-// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
-// CHECK: ret float [[TMP2]]
+// CHECK-LABEL: define dso_local float @test_vfmss_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG:%.*]] = fneg float [[B]]
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[FNEG]], float [[EXTRACT]], float [[A]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmss_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b, <1 x double> noundef %v) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CHECK: ret <1 x double> [[FMLA2]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfma_lane_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// CHECK-NEXT: ret <1 x double> [[FMLA2]]
+//
float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfma_lane_f64(a, b, v, 0);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_lane_f64(<1 x double> noundef %a, <1 x double> noundef %b, <1 x double> noundef %v) #0 {
-// CHECK: [[SUB:%.*]] = fneg <1 x double> %b
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
-// CHECK: ret <1 x double> [[FMLA2]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfms_lane_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V]] to i64
+// CHECK-NEXT: [[__S2_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[__S2_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP6]], <1 x double> [[TMP6]], <1 x i32> zeroinitializer
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x double>
+// CHECK-NEXT: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
+// CHECK-NEXT: ret <1 x double> [[FMLA2]]
+//
float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, 0);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfma_laneq_f64(<1 x double> noundef %a, <1 x double> noundef %b, <2 x double> noundef %v) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// CHECK: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CHECK: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// CHECK: ret <1 x double> [[TMP7]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfma_laneq_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP10]]
+//
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, 0);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vfms_laneq_f64(<1 x double> noundef %a, <1 x double> noundef %b, <2 x double> noundef %v) #0 {
-// CHECK: [[SUB:%.*]] = fneg <1 x double> %b
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
-// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-// CHECK: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
-// CHECK: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
-// CHECK: ret <1 x double> [[TMP7]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vfms_laneq_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]], <2 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[FNEG:%.*]] = fneg <1 x double> [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG]] to i64
+// CHECK-NEXT: [[__S1_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[__S1_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to double
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to double
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+// CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[EXTRACT]], double [[TMP6]])
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast double [[TMP9]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP10]]
+//
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfms_laneq_f64(a, b, v, 0);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
-// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
-// CHECK: ret i32 [[TMP4]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmullh_lane_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
return vqdmullh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
-// CHECK: ret i64 [[VQDMULLS_S32_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmulls_lane_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B]], i32 1
+// CHECK-NEXT: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A]], i32 [[VGET_LANE]])
+// CHECK-NEXT: ret i64 [[VQDMULLS_S32_I]]
+//
int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
return vqdmulls_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmullh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
-// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
-// CHECK: ret i32 [[TMP4]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmullh_laneq_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
return vqdmullh_laneq_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmulls_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK: ret i64 [[VQDMULLS_S32_I]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmulls_laneq_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B]], i32 3
+// CHECK-NEXT: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A]], i32 [[VGETQ_LANE]])
+// CHECK-NEXT: ret i64 [[VQDMULLS_S32_I]]
+//
int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
return vqdmulls_laneq_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
-// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP4]]
+// CHECK-LABEL: define dso_local i16 @test_vqdmulhh_lane_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
return vqdmulhh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
-// CHECK: ret i32 [[VQDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmulhs_lane_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B]], i32 1
+// CHECK-NEXT: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A]], i32 [[VGET_LANE]])
+// CHECK-NEXT: ret i32 [[VQDMULHS_S32_I]]
+//
int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
return vqdmulhs_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} i16 @test_vqdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
-// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP4]]
+// CHECK-LABEL: define dso_local i16 @test_vqdmulhh_laneq_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
return vqdmulhh_laneq_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK: ret i32 [[VQDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmulhs_laneq_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B]], i32 3
+// CHECK-NEXT: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A]], i32 [[VGETQ_LANE]])
+// CHECK-NEXT: ret i32 [[VQDMULHS_S32_I]]
+//
int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
return vqdmulhs_laneq_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_lane_s16(i16 noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
-// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP4]]
+// CHECK-LABEL: define dso_local i16 @test_vqrdmulhh_lane_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0
+// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
return vqrdmulhh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_lane_s32(i32 noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
-// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
-// CHECK: ret i32 [[VQRDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqrdmulhs_lane_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[B]], i32 1
+// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A]], i32 [[VGET_LANE]])
+// CHECK-NEXT: ret i32 [[VQRDMULHS_S32_I]]
+//
int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
return vqrdmulhs_lane_s32(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} i16 @test_vqrdmulhh_laneq_s16(i16 noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
-// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
-// CHECK: ret i16 [[TMP4]]
+// CHECK-LABEL: define dso_local i16 @test_vqrdmulhh_laneq_s16(
+// CHECK-SAME: i16 noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[B]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0
+// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// CHECK-NEXT: ret i16 [[TMP2]]
+//
int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
return vqrdmulhh_laneq_s16(a, b, 7);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqrdmulhs_laneq_s32(i32 noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
-// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
-// CHECK: ret i32 [[VQRDMULHS_S32_I]]
+// CHECK-LABEL: define dso_local i32 @test_vqrdmulhs_laneq_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[B]], i32 3
+// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A]], i32 [[VGETQ_LANE]])
+// CHECK-NEXT: ret i32 [[VQRDMULHS_S32_I]]
+//
int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
return vqrdmulhs_laneq_s32(a, b, 3);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_lane_s16(i32 noundef %a, i16 noundef %b, <4 x i16> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
-// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
-// CHECK: ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlalh_lane_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <4 x i16> [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[LANE0]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1]]
+//
int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
return vqdmlalh_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_lane_s32(i64 noundef %a, i32 noundef %b, <2 x i32> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
-// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK: ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlals_lane_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <2 x i32> [[C]], i32 1
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[LANE]])
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A]], i64 [[VQDMLXL]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1]]
+//
int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlals_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlalh_laneq_s16(i32 noundef %a, i16 noundef %b, <8 x i16> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
-// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]])
-// CHECK: ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlalh_laneq_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <8 x i16> [[C]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[LANE0]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1]]
+//
int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlalh_laneq_s16(a, b, c, 7);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlals_laneq_s32(i64 noundef %a, i32 noundef %b, <4 x i32> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
-// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK: ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlals_laneq_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <4 x i32> [[C]], i32 3
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[LANE]])
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A]], i64 [[VQDMLXL]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1]]
+//
int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
return vqdmlals_laneq_s32(a, b, c, 3);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_lane_s16(i32 noundef %a, i16 noundef %b, <4 x i16> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
-// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
-// CHECK: ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlslh_lane_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <4 x i16> [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[LANE0]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1]]
+//
int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
return vqdmlslh_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_lane_s32(i64 noundef %a, i32 noundef %b, <2 x i32> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
-// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK: ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlsls_lane_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <2 x i32> [[C]], i32 1
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[LANE]])
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A]], i64 [[VQDMLXL]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1]]
+//
int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlsls_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: define{{.*}} i32 @test_vqdmlslh_laneq_s16(i32 noundef %a, i16 noundef %b, <8 x i16> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
-// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
-// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
-// CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
-// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]])
-// CHECK: ret i32 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i32 @test_vqdmlslh_laneq_s16(
+// CHECK-SAME: i32 noundef [[A:%.*]], i16 noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <8 x i16> [[C]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LANE]], i64 0
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// CHECK-NEXT: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[LANE0]])
+// CHECK-NEXT: ret i32 [[VQDMLXL1]]
+//
int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlslh_laneq_s16(a, b, c, 7);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vqdmlsls_laneq_s32(i64 noundef %a, i32 noundef %b, <4 x i32> noundef %c) #0 {
-// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
-// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
-// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
-// CHECK: ret i64 [[VQDMLXL1]]
+// CHECK-LABEL: define dso_local i64 @test_vqdmlsls_laneq_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = extractelement <4 x i32> [[C]], i32 3
+// CHECK-NEXT: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[LANE]])
+// CHECK-NEXT: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A]], i64 [[VQDMLXL]])
+// CHECK-NEXT: ret i64 [[VQDMLXL1]]
+//
int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
return vqdmlsls_laneq_s32(a, b, c, 3);
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_lane_f64_0() #0 {
-// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
-// CHECK: ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_lane_f64_0(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE9:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE9]])
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
+//
float64x1_t test_vmulx_lane_f64_0() {
float64x1_t arg1;
float64x1_t arg2;
@@ -425,15 +571,18 @@ float64x1_t test_vmulx_lane_f64_0() {
return result;
}
-// CHECK-LABEL: define{{.*}} <1 x double> @test_vmulx_laneq_f64_2() #0 {
-// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
-// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
-// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
-// CHECK: ret <1 x double> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vmulx_laneq_f64_2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
+// CHECK-NEXT: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
+// CHECK-NEXT: ret <1 x double> [[VSET_LANE]]
+//
float64x1_t test_vmulx_laneq_f64_2() {
float64x1_t arg1;
float64x1_t arg2;
diff --git a/clang/test/CodeGen/AArch64/neon-vcmla.c b/clang/test/CodeGen/AArch64/neon-vcmla.c
index d860411fe45c5..72dad95939ecd 100644
--- a/clang/test/CodeGen/AArch64/neon-vcmla.c
+++ b/clang/test/CodeGen/AArch64/neon-vcmla.c
@@ -2,7 +2,7 @@
// RUN: %clang_cc1 -triple arm64 -target-feature +neon \
// RUN: -target-feature +v8.3a \
// RUN: -target-feature +fullfp16 \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes="mem2reg,instsimplify" | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes="mem2reg,instsimplify,sroa" | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -11,8 +11,20 @@
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[RHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F16_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F161_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F162_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[VCMLA_F16_I]], <4 x half> [[VCMLA_F161_I]], <4 x half> [[VCMLA_F162_I]])
+// CHECK-NEXT: [[VCMLA_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP7]]
//
float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_f16(acc, lhs, rhs);
@@ -21,8 +33,20 @@ float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F32_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F321_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F322_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[VCMLA_F32_I]], <2 x float> [[VCMLA_F321_I]], <2 x float> [[VCMLA_F322_I]])
+// CHECK-NEXT: [[VCMLA_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP7]]
//
float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_f32(acc, lhs, rhs);
@@ -31,8 +55,20 @@ float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[RHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F16_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F161_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F162_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[VCMLAQ_F16_I]], <8 x half> [[VCMLAQ_F161_I]], <8 x half> [[VCMLAQ_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP7]]
//
float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_f16(acc, lhs, rhs);
@@ -41,8 +77,20 @@ float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[VCMLAQ_F32_I]], <4 x float> [[VCMLAQ_F321_I]], <4 x float> [[VCMLAQ_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_f32(acc, lhs, rhs);
@@ -51,8 +99,20 @@ float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT: ret <2 x double> [[VCMLAQ_F643_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[ACC]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[LHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F64_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_F641_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_F642_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[VCMLAQ_F64_I]], <2 x double> [[VCMLAQ_F641_I]], <2 x double> [[VCMLAQ_F642_I]])
+// CHECK-NEXT: [[VCMLAQ_F644_I:%.*]] = bitcast <2 x double> [[VCMLAQ_F643_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_F644_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP7]]
//
float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_f64(acc, lhs, rhs);
@@ -61,8 +121,20 @@ float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[RHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F16_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F161_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F162_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[VCMLA_ROT90_F16_I]], <4 x half> [[VCMLA_ROT90_F161_I]], <4 x half> [[VCMLA_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT90_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP7]]
//
float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot90_f16(acc, lhs, rhs);
@@ -71,8 +143,20 @@ float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t r
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F32_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F321_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F322_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[VCMLA_ROT90_F32_I]], <2 x float> [[VCMLA_ROT90_F321_I]], <2 x float> [[VCMLA_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT90_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP7]]
//
float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot90_f32(acc, lhs, rhs);
@@ -81,8 +165,20 @@ float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t r
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[RHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F16_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F161_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F162_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[VCMLAQ_ROT90_F16_I]], <8 x half> [[VCMLAQ_ROT90_F161_I]], <8 x half> [[VCMLAQ_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT90_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP7]]
//
float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot90_f16(acc, lhs, rhs);
@@ -91,8 +187,20 @@ float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[VCMLAQ_ROT90_F32_I]], <4 x float> [[VCMLAQ_ROT90_F321_I]], <4 x float> [[VCMLAQ_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT90_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot90_f32(acc, lhs, rhs);
@@ -101,8 +209,20 @@ float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t
// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT90_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[ACC]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[LHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F64_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F641_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F642_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[VCMLAQ_ROT90_F64_I]], <2 x double> [[VCMLAQ_ROT90_F641_I]], <2 x double> [[VCMLAQ_ROT90_F642_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F644_I:%.*]] = bitcast <2 x double> [[VCMLAQ_ROT90_F643_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F644_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP7]]
//
float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot90_f64(acc, lhs, rhs);
@@ -111,8 +231,20 @@ float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[RHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F16_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F161_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F162_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[VCMLA_ROT180_F16_I]], <4 x half> [[VCMLA_ROT180_F161_I]], <4 x half> [[VCMLA_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT180_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP7]]
//
float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot180_f16(acc, lhs, rhs);
@@ -121,8 +253,20 @@ float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F32_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F321_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F322_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[VCMLA_ROT180_F32_I]], <2 x float> [[VCMLA_ROT180_F321_I]], <2 x float> [[VCMLA_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT180_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP7]]
//
float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot180_f32(acc, lhs, rhs);
@@ -131,8 +275,20 @@ float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[RHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F16_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F161_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F162_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[VCMLAQ_ROT180_F16_I]], <8 x half> [[VCMLAQ_ROT180_F161_I]], <8 x half> [[VCMLAQ_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT180_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP7]]
//
float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot180_f16(acc, lhs, rhs);
@@ -141,8 +297,20 @@ float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[VCMLAQ_ROT180_F32_I]], <4 x float> [[VCMLAQ_ROT180_F321_I]], <4 x float> [[VCMLAQ_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT180_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot180_f32(acc, lhs, rhs);
@@ -151,8 +319,20 @@ float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t
// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT180_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[ACC]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[LHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F64_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F641_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F642_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[VCMLAQ_ROT180_F64_I]], <2 x double> [[VCMLAQ_ROT180_F641_I]], <2 x double> [[VCMLAQ_ROT180_F642_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F644_I:%.*]] = bitcast <2 x double> [[VCMLAQ_ROT180_F643_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F644_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP7]]
//
float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot180_f64(acc, lhs, rhs);
@@ -161,8 +341,20 @@ float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[RHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F16_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F161_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F162_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[VCMLA_ROT270_F16_I]], <4 x half> [[VCMLA_ROT270_F161_I]], <4 x half> [[VCMLA_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT270_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP7]]
//
float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot270_f16(acc, lhs, rhs);
@@ -171,8 +363,20 @@ float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F32_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F321_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F322_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[VCMLA_ROT270_F32_I]], <2 x float> [[VCMLA_ROT270_F321_I]], <2 x float> [[VCMLA_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT270_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP7]]
//
float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot270_f32(acc, lhs, rhs);
@@ -181,8 +385,20 @@ float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[RHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F16_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F161_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F162_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[VCMLAQ_ROT270_F16_I]], <8 x half> [[VCMLAQ_ROT270_F161_I]], <8 x half> [[VCMLAQ_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT270_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP7]]
//
float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot270_f16(acc, lhs, rhs);
@@ -191,8 +407,20 @@ float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[VCMLAQ_ROT270_F32_I]], <4 x float> [[VCMLAQ_ROT270_F321_I]], <4 x float> [[VCMLAQ_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT270_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot270_f32(acc, lhs, rhs);
@@ -201,8 +429,20 @@ float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t
// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VCMLAQ_ROT270_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
-// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[ACC]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[LHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F64_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F641_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F642_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[VCMLAQ_ROT270_F64_I]], <2 x double> [[VCMLAQ_ROT270_F641_I]], <2 x double> [[VCMLAQ_ROT270_F642_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F644_I:%.*]] = bitcast <2 x double> [[VCMLAQ_ROT270_F643_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F644_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP7]]
//
float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot270_f64(acc, lhs, rhs);
@@ -211,19 +451,26 @@ float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_150:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_150:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_150]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_150]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_150]], align 8
-// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[VCMLA_F16_I]], <4 x half> [[VCMLA_F161_I]], <4 x half> [[VCMLA_F162_I]])
+// CHECK-NEXT: [[VCMLA_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_lane_f16(acc, lhs, rhs, 1);
@@ -233,19 +480,26 @@ float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rh
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_154:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_154:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_154]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_154]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_154]], align 8
-// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[VCMLA_F16_I]], <4 x half> [[VCMLA_F161_I]], <4 x half> [[VCMLA_F162_I]])
+// CHECK-NEXT: [[VCMLA_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_laneq_f16(acc, lhs, rhs, 3);
@@ -254,25 +508,30 @@ float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t r
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_152:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_152:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_152]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_152]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_152]], align 16
-// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[VCMLAQ_F16_I]], <8 x half> [[VCMLAQ_F161_I]], <8 x half> [[VCMLAQ_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_lane_f16(acc, lhs, rhs, 1);
@@ -281,25 +540,30 @@ float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t r
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_156:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_156:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_156]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_156]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_156]], align 16
-// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[VCMLAQ_F16_I]], <8 x half> [[VCMLAQ_F161_I]], <8 x half> [[VCMLAQ_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
@@ -308,16 +572,25 @@ float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_182:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_182:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_182]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_182]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_182_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_182_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_182]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_182]], align 8
-// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[VCMLA_F32_I]], <2 x float> [[VCMLA_F321_I]], <2 x float> [[VCMLA_F322_I]])
+// CHECK-NEXT: [[VCMLA_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_lane_f32(acc, lhs, rhs, 0);
@@ -327,16 +600,24 @@ float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rh
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_186:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_186:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_186]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_186]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_186]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_186]], align 8
-// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[VCMLA_F32_I]], <2 x float> [[VCMLA_F321_I]], <2 x float> [[VCMLA_F322_I]])
+// CHECK-NEXT: [[VCMLA_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_laneq_f32(acc, lhs, rhs, 1);
@@ -345,19 +626,27 @@ float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t r
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_184:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_184:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_184]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_184_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_184_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_184]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_184]], align 16
-// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x i64> [[__S2_184_SROA_0_0_VEC_INSERT]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[VCMLAQ_F32_I]], <4 x float> [[VCMLAQ_F321_I]], <4 x float> [[VCMLAQ_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_lane_f32(acc, lhs, rhs, 0);
@@ -366,19 +655,26 @@ float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t r
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_188:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_188:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_188]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_188]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_188]], align 16
-// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[VCMLAQ_F32_I]], <4 x float> [[VCMLAQ_F321_I]], <4 x float> [[VCMLAQ_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
@@ -387,19 +683,26 @@ float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_174:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_174:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_174]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_174]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_174]], align 8
-// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[VCMLA_ROT90_F16_I]], <4 x half> [[VCMLA_ROT90_F161_I]], <4 x half> [[VCMLA_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT90_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
@@ -409,19 +712,26 @@ float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_178:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_178:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_178]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_178]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_178]], align 8
-// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[VCMLA_ROT90_F16_I]], <4 x half> [[VCMLA_ROT90_F161_I]], <4 x half> [[VCMLA_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT90_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
@@ -430,25 +740,30 @@ float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_176:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_176:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_176]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_176]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_176]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[VCMLAQ_ROT90_F16_I]], <8 x half> [[VCMLAQ_ROT90_F161_I]], <8 x half> [[VCMLAQ_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT90_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
@@ -457,25 +772,30 @@ float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_180:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_180:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_180]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_180]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_180]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[VCMLAQ_ROT90_F16_I]], <8 x half> [[VCMLAQ_ROT90_F161_I]], <8 x half> [[VCMLAQ_ROT90_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT90_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
@@ -484,16 +804,25 @@ float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float1
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_206:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_206:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_206]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_206]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_206_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_206_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_206]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_206]], align 8
-// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[VCMLA_ROT90_F32_I]], <2 x float> [[VCMLA_ROT90_F321_I]], <2 x float> [[VCMLA_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT90_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
@@ -503,16 +832,24 @@ float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_210:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_210:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_210]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_210]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_210]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_210]], align 8
-// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT90_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[VCMLA_ROT90_F32_I]], <2 x float> [[VCMLA_ROT90_F321_I]], <2 x float> [[VCMLA_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT90_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT90_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT90_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
@@ -521,19 +858,27 @@ float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_208:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_208:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_208]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_208_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_208_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_208]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_208]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x i64> [[__S2_208_SROA_0_0_VEC_INSERT]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[VCMLAQ_ROT90_F32_I]], <4 x float> [[VCMLAQ_ROT90_F321_I]], <4 x float> [[VCMLAQ_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT90_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
@@ -542,19 +887,26 @@ float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_212:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_212:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_212]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_212]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_212]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[VCMLAQ_ROT90_F32_I]], <4 x float> [[VCMLAQ_ROT90_F321_I]], <4 x float> [[VCMLAQ_ROT90_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT90_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT90_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT90_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
@@ -563,19 +915,26 @@ float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float3
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_158:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_158:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_158]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_158]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_158]], align 8
-// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[VCMLA_ROT180_F16_I]], <4 x half> [[VCMLA_ROT180_F161_I]], <4 x half> [[VCMLA_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT180_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
@@ -585,19 +944,26 @@ float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_162:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_162:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_162]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_162]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_162]], align 8
-// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[VCMLA_ROT180_F16_I]], <4 x half> [[VCMLA_ROT180_F161_I]], <4 x half> [[VCMLA_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT180_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
@@ -606,25 +972,30 @@ float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float1
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_160:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_160:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_160]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_160]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_160]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[VCMLAQ_ROT180_F16_I]], <8 x half> [[VCMLAQ_ROT180_F161_I]], <8 x half> [[VCMLAQ_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT180_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
@@ -633,25 +1004,30 @@ float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float1
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_164:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_164:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_164]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_164]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_164]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[VCMLAQ_ROT180_F16_I]], <8 x half> [[VCMLAQ_ROT180_F161_I]], <8 x half> [[VCMLAQ_ROT180_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT180_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
@@ -660,16 +1036,25 @@ float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_190:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_190:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_190]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_190]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_190_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_190_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_190]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_190]], align 8
-// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[VCMLA_ROT180_F32_I]], <2 x float> [[VCMLA_ROT180_F321_I]], <2 x float> [[VCMLA_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT180_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
@@ -679,16 +1064,24 @@ float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_194:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_194:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_194]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_194]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_194]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_194]], align 8
-// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT180_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[VCMLA_ROT180_F32_I]], <2 x float> [[VCMLA_ROT180_F321_I]], <2 x float> [[VCMLA_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT180_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT180_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT180_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
@@ -697,19 +1090,27 @@ float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float3
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_192:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_192:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_192]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_192_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_192_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_192]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_192]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x i64> [[__S2_192_SROA_0_0_VEC_INSERT]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[VCMLAQ_ROT180_F32_I]], <4 x float> [[VCMLAQ_ROT180_F321_I]], <4 x float> [[VCMLAQ_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT180_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
@@ -718,19 +1119,26 @@ float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float3
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_196:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_196:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_196]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_196]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_196]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[VCMLAQ_ROT180_F32_I]], <4 x float> [[VCMLAQ_ROT180_F321_I]], <4 x float> [[VCMLAQ_ROT180_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT180_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT180_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT180_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
@@ -739,19 +1147,26 @@ float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_166:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_166:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_166]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_166]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_166]], align 8
-// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[VCMLA_ROT270_F16_I]], <4 x half> [[VCMLA_ROT270_F161_I]], <4 x half> [[VCMLA_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT270_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
@@ -761,19 +1176,26 @@ float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16
// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_170:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_170:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_170]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_170]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_170]], align 8
-// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT6]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[ACC]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LHS]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F16_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F161_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F162_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[VCMLA_ROT270_F16_I]], <4 x half> [[VCMLA_ROT270_F161_I]], <4 x half> [[VCMLA_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F164_I:%.*]] = bitcast <4 x half> [[VCMLA_ROT270_F163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F164_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
@@ -782,25 +1204,30 @@ float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float1
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_168:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_168:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_168]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[RHS]] to <2 x i32>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_168]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_168]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[VCMLAQ_ROT270_F16_I]], <8 x half> [[VCMLAQ_ROT270_F161_I]], <8 x half> [[VCMLAQ_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT270_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
@@ -809,25 +1236,30 @@ float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float1
// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_172:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_172:%.*]] = alloca <4 x i32>, align 16
-// CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_172]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
-// CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16
-// CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3
-// CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_172]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_172]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]])
-// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[VGETQ_LANE10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[VGETQ_LANE10]], i32 2
+// CHECK-NEXT: [[VGETQ_LANE16:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <4 x i32> [[VECINIT12]], i32 [[VGETQ_LANE16]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT18]] to <8 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[ACC]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LHS]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F16_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F161_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F162_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[VCMLAQ_ROT270_F16_I]], <8 x half> [[VCMLAQ_ROT270_F161_I]], <8 x half> [[VCMLAQ_ROT270_F162_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F164_I:%.*]] = bitcast <8 x half> [[VCMLAQ_ROT270_F163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F164_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
@@ -836,16 +1268,25 @@ float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_198:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_198:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_198]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_198]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_198_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_198_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_198]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_198]], align 8
-// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[VCMLA_ROT270_F32_I]], <2 x float> [[VCMLA_ROT270_F321_I]], <2 x float> [[VCMLA_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT270_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
@@ -855,16 +1296,24 @@ float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32
// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_202:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_202:%.*]] = alloca <1 x i64>, align 8
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_202]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_202]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_202]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_202]], align 8
-// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
-// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[ACC]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VCMLA_ROT270_F32_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F321_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F322_I:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[VCMLA_ROT270_F32_I]], <2 x float> [[VCMLA_ROT270_F321_I]], <2 x float> [[VCMLA_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLA_ROT270_F324_I:%.*]] = bitcast <2 x float> [[VCMLA_ROT270_F323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[VCMLA_ROT270_F324_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
@@ -873,19 +1322,27 @@ float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float3
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_200:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[__REINT1_200:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_200]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8
-// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[__S2_200_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[__S2_200_SROA_0_0_VEC_INSERT]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_200]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_200]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <1 x i64> [[__S2_200_SROA_0_0_VEC_INSERT]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[VCMLAQ_ROT270_F32_I]], <4 x float> [[VCMLAQ_ROT270_F321_I]], <4 x float> [[VCMLAQ_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT270_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
@@ -894,19 +1351,26 @@ float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float3
// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[__REINT_204:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[__REINT1_204:%.*]] = alloca <2 x i64>, align 16
-// CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_204]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16
-// CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1
-// CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_204]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_204]], align 16
-// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+// CHECK-NEXT: [[VGETQ_LANE4:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE4]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT6]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ACC]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F32_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F321_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F322_I:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[VCMLAQ_ROT270_F32_I]], <4 x float> [[VCMLAQ_ROT270_F321_I]], <4 x float> [[VCMLAQ_ROT270_F322_I]])
+// CHECK-NEXT: [[VCMLAQ_ROT270_F324_I:%.*]] = bitcast <4 x float> [[VCMLAQ_ROT270_F323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[VCMLAQ_ROT270_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
diff --git a/clang/test/CodeGen/AArch64/poly-add.c b/clang/test/CodeGen/AArch64/poly-add.c
index 0795aecac433f..069df72f87deb 100644
--- a/clang/test/CodeGen/AArch64/poly-add.c
+++ b/clang/test/CodeGen/AArch64/poly-add.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
diff --git a/clang/test/CodeGen/AArch64/poly128.c b/clang/test/CodeGen/AArch64/poly128.c
index f188632468fc8..a9df831c07cb6 100644
--- a/clang/test/CodeGen/AArch64/poly128.c
+++ b/clang/test/CodeGen/AArch64/poly128.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -passes=mem2reg \
+// RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -61,7 +61,7 @@ __attribute__((target("aes"))) poly128_t test_vmull_p64(poly64_t a, poly64_t b)
}
// CHECK-LABEL: define {{[^@]+}}@test_vmull_high_p64
-// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-SAME: (<2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SHUFFLE_I5:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> <i32 1>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I5]] to i64
@@ -76,7 +76,7 @@ __attribute__((target("aes"))) poly128_t test_vmull_high_p64(poly64x2_t a, poly6
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -86,7 +86,7 @@ poly128_t test_vreinterpretq_p128_s8(int8x16_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -96,7 +96,7 @@ poly128_t test_vreinterpretq_p128_s16(int16x8_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -106,7 +106,7 @@ poly128_t test_vreinterpretq_p128_s32(int32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_s64
-// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -116,7 +116,7 @@ poly128_t test_vreinterpretq_p128_s64(int64x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -126,7 +126,7 @@ poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -136,7 +136,7 @@ poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -146,7 +146,7 @@ poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_u64
-// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -156,7 +156,7 @@ poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -166,7 +166,7 @@ poly128_t test_vreinterpretq_p128_f32(float32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_f64
-// CHECK-SAME: (<2 x double> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -176,7 +176,7 @@ poly128_t test_vreinterpretq_p128_f64(float64x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -186,7 +186,7 @@ poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -196,7 +196,7 @@ poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p128_p64
-// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (<2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to i128
// CHECK-NEXT: ret i128 [[TMP0]]
@@ -206,7 +206,7 @@ poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s8_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8>
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
@@ -216,7 +216,7 @@ int8x16_t test_vreinterpretq_s8_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s16_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
@@ -226,7 +226,7 @@ int16x8_t test_vreinterpretq_s16_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s32_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
@@ -236,7 +236,7 @@ int32x4_t test_vreinterpretq_s32_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_s64_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
@@ -246,7 +246,7 @@ int64x2_t test_vreinterpretq_s64_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u8_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8>
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
@@ -256,7 +256,7 @@ uint8x16_t test_vreinterpretq_u8_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u16_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
@@ -266,7 +266,7 @@ uint16x8_t test_vreinterpretq_u16_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u32_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
@@ -276,7 +276,7 @@ uint32x4_t test_vreinterpretq_u32_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_u64_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
@@ -286,7 +286,7 @@ uint64x2_t test_vreinterpretq_u64_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_f32_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <4 x float>
// CHECK-NEXT: ret <4 x float> [[TMP0]]
@@ -296,7 +296,7 @@ float32x4_t test_vreinterpretq_f32_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_f64_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x double>
// CHECK-NEXT: ret <2 x double> [[TMP0]]
@@ -306,7 +306,7 @@ float64x2_t test_vreinterpretq_f64_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p8_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <16 x i8>
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
@@ -316,7 +316,7 @@ poly8x16_t test_vreinterpretq_p8_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p16_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
@@ -326,7 +326,7 @@ poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vreinterpretq_p64_p128
-// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (i128 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
diff --git a/clang/test/CodeGen/AArch64/poly64.c b/clang/test/CodeGen/AArch64/poly64.c
index f3c057ecf48c1..578dd2054dc66 100644
--- a/clang/test/CodeGen/AArch64/poly64.c
+++ b/clang/test/CodeGen/AArch64/poly64.c
@@ -1,537 +1,642 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
+// RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vceq_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vceq_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[SEXT_I]]
+//
uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
return vceq_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vceqq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[SEXT_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vceqq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[SEXT_I]]
+//
uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
return vceqq_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vtst_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b
-// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
-// CHECK: ret <1 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vtst_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[VTST_I]]
+//
uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
return vtst_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vtstq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b
-// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VTST_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtstq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VTST_I]]
+//
uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
return vtstq_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 {
-// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
-// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
-// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <1 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vbsl_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]], <1 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <1 x i64> [[VBSL5_I]]
+//
poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
return vbsl_p64(a, b, c);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 {
-// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
-// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1)
-// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
-// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK: ret <2 x i64> [[VBSL5_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vbslq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]], <2 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], splat (i64 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT: ret <2 x i64> [[VBSL5_I]]
+//
poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
return vbslq_p64(a, b, c);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vget_lane_p64(<1 x i64> noundef %v) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
-// CHECK: ret i64 [[VGET_LANE]]
+// CHECK-LABEL: define dso_local i64 @test_vget_lane_p64(
+// CHECK-SAME: <1 x i64> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[V]], i32 0
+// CHECK-NEXT: ret i64 [[VGET_LANE]]
+//
poly64_t test_vget_lane_p64(poly64x1_t v) {
return vget_lane_p64(v, 0);
}
-// CHECK-LABEL: define{{.*}} i64 @test_vgetq_lane_p64(<2 x i64> noundef %v) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
-// CHECK: ret i64 [[VGETQ_LANE]]
+// CHECK-LABEL: define dso_local i64 @test_vgetq_lane_p64(
+// CHECK-SAME: <2 x i64> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[V]], i32 1
+// CHECK-NEXT: ret i64 [[VGETQ_LANE]]
+//
poly64_t test_vgetq_lane_p64(poly64x2_t v) {
return vgetq_lane_p64(v, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_p64(i64 noundef %a, <1 x i64> noundef %v) #0 {
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
-// CHECK: ret <1 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vset_lane_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <1 x i64> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[V]], i64 [[A]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VSET_LANE]]
+//
poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
return vset_lane_p64(a, v, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_p64(i64 noundef %a, <2 x i64> noundef %v) #0 {
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsetq_lane_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <2 x i64> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[V]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VSET_LANE]]
+//
poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
return vsetq_lane_p64(a, v, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vcopy_lane_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0
-// CHECK: ret <1 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcopy_lane_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[B]], i32 0
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[A]], i64 [[VGET_LANE]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VSET_LANE]]
+//
poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
return vcopy_lane_p64(a, 0, b, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_lane_p64(<2 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1
-// CHECK: ret <2 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcopyq_lane_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[B]], i32 0
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[A]], i64 [[VGET_LANE]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VSET_LANE]]
+//
poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
return vcopyq_lane_p64(a, 1, b, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1
-// CHECK: ret <2 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcopyq_laneq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[B]], i32 1
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[A]], i64 [[VGETQ_LANE]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VSET_LANE]]
+//
poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
return vcopyq_laneq_p64(a, 1, b, 1);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vcreate_p64(i64 noundef %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vcreate_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
poly64x1_t test_vcreate_p64(uint64_t a) {
return vcreate_p64(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_n_p64(i64 noundef %a) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: ret <1 x i64> [[VECINIT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vdup_n_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VECINIT_I]]
+//
poly64x1_t test_vdup_n_p64(poly64_t a) {
return vdup_n_p64(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_n_p64(i64 noundef %a) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VECINIT1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vdupq_n_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VECINIT1_I]]
+//
poly64x2_t test_vdupq_n_p64(poly64_t a) {
return vdupq_n_p64(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vmov_n_p64(i64 noundef %a) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: ret <1 x i64> [[VECINIT_I]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vmov_n_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VECINIT_I]]
+//
poly64x1_t test_vmov_n_p64(poly64_t a) {
return vmov_n_p64(a);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vmovq_n_p64(i64 noundef %a) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VECINIT1_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vmovq_n_p64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VECINIT1_I]]
+//
poly64x2_t test_vmovq_n_p64(poly64_t a) {
return vmovq_n_p64(a);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_lane_p64(<1 x i64> noundef %vec) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vdup_lane_p64(
+// CHECK-SAME: <1 x i64> noundef [[VEC:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
return vdup_lane_p64(vec, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_lane_p64(<1 x i64> noundef %vec) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vdupq_lane_p64(
+// CHECK-SAME: <1 x i64> noundef [[VEC:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
return vdupq_lane_p64(vec, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_laneq_p64(<2 x i64> noundef %vec) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vdupq_laneq_p64(
+// CHECK-SAME: <2 x i64> noundef [[VEC:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
return vdupq_laneq_p64(vec, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vcombine_p64(<1 x i64> noundef %low, <1 x i64> noundef %high) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vcombine_p64(
+// CHECK-SAME: <1 x i64> noundef [[LOW:%.*]], <1 x i64> noundef [[HIGH:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> [[LOW]], <1 x i64> [[HIGH]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
return vcombine_p64(low, high);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_p64(ptr noundef %ptr) #0 {
-// CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %ptr
-// CHECK: ret <1 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vld1_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[PTR]], align 8
+// CHECK-NEXT: ret <1 x i64> [[TMP0]]
+//
poly64x1_t test_vld1_p64(poly64_t const * ptr) {
return vld1_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_p64(ptr noundef %ptr) #0 {
-// CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %ptr
-// CHECK: ret <2 x i64> [[TMP2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vld1q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[PTR]], align 8
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
return vld1q_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1_p64(ptr noundef %ptr, <1 x i64> noundef %val) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: store <1 x i64> [[TMP3]], ptr %ptr
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], <1 x i64> noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VAL]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: store <1 x i64> [[TMP1]], ptr [[PTR]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
return vst1_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst1q_p64(ptr noundef %ptr, <2 x i64> noundef %val) #0 {
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: store <2 x i64> [[TMP3]], ptr %ptr
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst1q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], <2 x i64> noundef [[VAL:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[PTR]], align 8
+// CHECK-NEXT: ret void
+//
void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
return vst1q_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %ptr)
-// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld2_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T:%.*]] poison, <1 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
return vld2_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %ptr)
-// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x2_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld2q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T:%.*]] poison, <2 x i64> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X2_T]] [[DOTFCA_0_1_INSERT]]
+//
poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
return vld2q_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %ptr)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld3_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T:%.*]] poison, <1 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
return vld3_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %ptr)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x3_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld3q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T:%.*]] poison, <2 x i64> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X3_T]] [[DOTFCA_0_2_INSERT]]
+//
poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
return vld3q_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %ptr)
-// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
-// CHECK: ret %struct.poly64x1x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld4_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T:%.*]] poison, <1 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_0_INSERT]], <1 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_1_INSERT]], <1 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_2_INSERT]], <1 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X1X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
return vld4_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_p64(ptr noundef %ptr) #0 {
-// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %ptr)
-// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
-// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
-// CHECK: ret %struct.poly64x2x4_t [[TMP6]]
+// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld4q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T:%.*]] poison, <2 x i64> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_0_INSERT]], <2 x i64> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_1_INSERT]], <2 x i64> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_2_INSERT]], <2 x i64> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_POLY64X2X4_T]] [[DOTFCA_0_3_INSERT]]
+//
poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
return vld4q_p64(ptr);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2_p64(ptr noundef %ptr, [2 x <1 x i64>] alignstack(8) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [2 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 16, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [2 x <1 x i64>] alignstack(8) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
return vst2_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst2q_p64(ptr noundef %ptr, [2 x <2 x i64>] alignstack(16) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [2 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 32, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst2q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [2 x <2 x i64>] alignstack(16) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
return vst2q_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3_p64(ptr noundef %ptr, [3 x <1 x i64>] alignstack(8) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [3 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 24, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [3 x <1 x i64>] alignstack(8) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[VAL_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
return vst3_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst3q_p64(ptr noundef %ptr, [3 x <2 x i64>] alignstack(16) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [3 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 48, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst3q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [3 x <2 x i64>] alignstack(16) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[VAL_COERCE]], 2
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
return vst3q_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4_p64(ptr noundef %ptr, [4 x <1 x i64>] alignstack(8) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [4 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 32, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL6]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX7]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [4 x <1 x i64>] alignstack(8) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[VAL_COERCE]], 2
+// CHECK-NEXT: [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[VAL_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[VAL_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
return vst4_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} void @test_vst4q_p64(ptr noundef %ptr, [4 x <2 x i64>] alignstack(16) %val.coerce) #0 {
-// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[VAL]], i32 0, i32 0
-// CHECK: store [4 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 64, i1 false)
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL6]], i64 0, i64 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX7]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %ptr)
-// CHECK: ret void
+// CHECK-LABEL: define dso_local void @test_vst4q_p64(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], [4 x <2 x i64>] alignstack(16) [[VAL_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VAL_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[VAL_COERCE]], 0
+// CHECK-NEXT: [[VAL_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[VAL_COERCE]], 1
+// CHECK-NEXT: [[VAL_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[VAL_COERCE]], 2
+// CHECK-NEXT: [[VAL_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[VAL_COERCE]], 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[VAL_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[PTR]])
+// CHECK-NEXT: ret void
+//
void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
return vst4q_p64(ptr, val);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vext_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[VEXT]]
+//
poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
return vext_u64(a, b, 0);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i64> [[VEXT]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vextq_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[VEXT]]
+//
poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
return vextq_p64(a, b, 1);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip1q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
return vzip1q_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vzip2q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
return vzip2q_u64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp1q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
return vuzp1q_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vuzp2q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
return vuzp2q_u64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn1q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
return vtrn1q_p64(a, b);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vtrn2q_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
return vtrn2q_u64(a, b);
}
-// CHECK-LABEL: define{{.*}} <1 x i64> @test_vsri_n_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
-// CHECK: ret <1 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <1 x i64> @test_vsri_n_p64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
+// CHECK-NEXT: ret <1 x i64> [[VSRI_N2]]
+//
poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
return vsri_n_p64(a, b, 33);
}
-// CHECK-LABEL: define{{.*}} <2 x i64> @test_vsriq_n_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
-// CHECK: ret <2 x i64> [[VSRI_N2]]
+// CHECK-LABEL: define dso_local <2 x i64> @test_vsriq_n_p64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
+// CHECK-NEXT: ret <2 x i64> [[VSRI_N2]]
+//
poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
return vsriq_n_p64(a, b, 64);
}
diff --git a/clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c
index bc985efa6bc99..3317db1bf5af6 100644
--- a/clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.1a-neon-intrinsics.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
-// RUN: -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,dce -S | FileCheck %s
+// RUN: -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,sroa,dce -S | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -11,8 +11,16 @@
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
-// CHECK-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMLAH_S16_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLAH_S161_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLAH_S162_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLAH_S163_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[VQRDMLAH_S16_I]], <4 x i16> [[VQRDMLAH_S161_I]], <4 x i16> [[VQRDMLAH_S162_I]])
+// CHECK-NEXT: [[VQRDMLAH_S164_I:%.*]] = bitcast <4 x i16> [[VQRDMLAH_S163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[VQRDMLAH_S164_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP5]]
//
int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
return vqrdmlah_laneq_s16(a, b, v, 7);
@@ -23,8 +31,16 @@ int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
-// CHECK-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMLAH_S32_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLAH_S321_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLAH_S322_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLAH_S323_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[VQRDMLAH_S32_I]], <2 x i32> [[VQRDMLAH_S321_I]], <2 x i32> [[VQRDMLAH_S322_I]])
+// CHECK-NEXT: [[VQRDMLAH_S324_I:%.*]] = bitcast <2 x i32> [[VQRDMLAH_S323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[VQRDMLAH_S324_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP5]]
//
int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
return vqrdmlah_laneq_s32(a, b, v, 3);
@@ -35,8 +51,16 @@ int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
-// CHECK-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMLAHQ_S16_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLAHQ_S161_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLAHQ_S162_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLAHQ_S163_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[VQRDMLAHQ_S16_I]], <8 x i16> [[VQRDMLAHQ_S161_I]], <8 x i16> [[VQRDMLAHQ_S162_I]])
+// CHECK-NEXT: [[VQRDMLAHQ_S164_I:%.*]] = bitcast <8 x i16> [[VQRDMLAHQ_S163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[VQRDMLAHQ_S164_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP5]]
//
int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
return vqrdmlahq_laneq_s16(a, b, v, 7);
@@ -47,8 +71,16 @@ int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
-// CHECK-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMLAHQ_S32_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLAHQ_S321_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLAHQ_S322_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLAHQ_S323_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[VQRDMLAHQ_S32_I]], <4 x i32> [[VQRDMLAHQ_S321_I]], <4 x i32> [[VQRDMLAHQ_S322_I]])
+// CHECK-NEXT: [[VQRDMLAHQ_S324_I:%.*]] = bitcast <4 x i32> [[VQRDMLAHQ_S323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[VQRDMLAHQ_S324_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP5]]
//
int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
return vqrdmlahq_laneq_s32(a, b, v, 3);
@@ -129,8 +161,16 @@ int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]])
-// CHECK-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMLSH_S16_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLSH_S161_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLSH_S162_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMLSH_S163_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[VQRDMLSH_S16_I]], <4 x i16> [[VQRDMLSH_S161_I]], <4 x i16> [[VQRDMLSH_S162_I]])
+// CHECK-NEXT: [[VQRDMLSH_S164_I:%.*]] = bitcast <4 x i16> [[VQRDMLSH_S163_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[VQRDMLSH_S164_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP5]]
//
int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
return vqrdmlsh_laneq_s16(a, b, v, 7);
@@ -141,8 +181,16 @@ int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]])
-// CHECK-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMLSH_S32_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLSH_S321_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLSH_S322_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMLSH_S323_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[VQRDMLSH_S32_I]], <2 x i32> [[VQRDMLSH_S321_I]], <2 x i32> [[VQRDMLSH_S322_I]])
+// CHECK-NEXT: [[VQRDMLSH_S324_I:%.*]] = bitcast <2 x i32> [[VQRDMLSH_S323_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[VQRDMLSH_S324_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP5]]
//
int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
return vqrdmlsh_laneq_s32(a, b, v, 3);
@@ -153,8 +201,16 @@ int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]])
-// CHECK-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMLSHQ_S16_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLSHQ_S161_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLSHQ_S162_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMLSHQ_S163_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[VQRDMLSHQ_S16_I]], <8 x i16> [[VQRDMLSHQ_S161_I]], <8 x i16> [[VQRDMLSHQ_S162_I]])
+// CHECK-NEXT: [[VQRDMLSHQ_S164_I:%.*]] = bitcast <8 x i16> [[VQRDMLSHQ_S163_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[VQRDMLSHQ_S164_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP5]]
//
int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
return vqrdmlshq_laneq_s16(a, b, v, 7);
@@ -165,8 +221,16 @@ int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]])
-// CHECK-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMLSHQ_S32_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLSHQ_S321_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLSHQ_S322_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMLSHQ_S323_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[VQRDMLSHQ_S32_I]], <4 x i32> [[VQRDMLSHQ_S321_I]], <4 x i32> [[VQRDMLSHQ_S322_I]])
+// CHECK-NEXT: [[VQRDMLSHQ_S324_I:%.*]] = bitcast <4 x i32> [[VQRDMLSHQ_S323_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[VQRDMLSHQ_S324_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP5]]
//
int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
return vqrdmlshq_laneq_s32(a, b, v, 3);
diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.c
index b51e6f7e6e1ac..02ddbf2950829 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.c
@@ -1,21 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
+// RUN: | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefix=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
// RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED --implicit-check-not=fpexcept.maytrap %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
-// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
-// RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
-// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM --implicit-check-not=fpexcept.maytrap %s
+// RUN: | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck --check-prefix=CONSTRAINED --implicit-check-not=fpexcept.maytrap %s
// REQUIRES: aarch64-registered-target
@@ -29,310 +21,754 @@
#include <arm_neon.h>
-// COMMON-LABEL: test_vsqrt_f16
-// UNCONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
-// CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-// COMMONIR: ret <4 x half> [[SQR]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vsqrt_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[TMP2]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[VSQRT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vsqrt_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> [[TMP2]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2:[0-9]+]]
+// CONSTRAINED-NEXT: ret <4 x half> [[VSQRT_I]]
+//
float16x4_t test_vsqrt_f16(float16x4_t a) {
return vsqrt_f16(a);
}
-// COMMON-LABEL: test_vsqrtq_f16
-// UNCONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
-// CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-// COMMONIR: ret <8 x half> [[SQR]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vsqrtq_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[TMP2]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[VSQRT_I]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vsqrtq_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CONSTRAINED-NEXT: [[VSQRT_I:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> [[TMP2]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[VSQRT_I]]
+//
float16x8_t test_vsqrtq_f16(float16x8_t a) {
return vsqrtq_f16(a);
}
-// COMMON-LABEL: test_vfma_f16
-// UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
-// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-// COMMONIR: ret <4 x half> [[ADD]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfma_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmaq_f16
-// UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
-// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-// COMMONIR: ret <8 x half> [[ADD]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmaq_f16(a, b, c);
}
-// COMMON-LABEL: test_vfms_f16
-// COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
-// UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
-// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-// COMMONIR: ret <4 x half> [[ADD]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <4 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <4 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfms_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmsq_f16
-// COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
-// UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
-// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-// COMMONIR: ret <8 x half> [[ADD]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmsq_f16(a, b, c);
}
-// COMMON-LABEL: test_vfma_lane_f16
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
-// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_lane_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_lane_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[FMLA2]]
+//
float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfma_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfmaq_lane_f16
-// COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
-// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_lane_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_lane_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[FMLA2]]
+//
float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
return vfmaq_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfma_laneq_f16
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
-// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_laneq_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_laneq_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
return vfma_laneq_f16(a, b, c, 7);
}
-// COMMON-LABEL: test_vfmaq_laneq_f16
-// COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
-// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_laneq_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_laneq_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmaq_laneq_f16(a, b, c, 7);
}
-// COMMON-LABEL: test_vfma_n_f16
-// COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0
-// COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
-// COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
-// COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
-// UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
-// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_n_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[C]], i32 0
+// UNCONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
+// UNCONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfma_n_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[C]], i32 0
+// CONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
+// CONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
+// CONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
return vfma_n_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmaq_n_f16
-// COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0
-// COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
-// COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
-// COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
-// COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
-// COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
-// COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
-// COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
-// UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
-// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_n_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[C]], i32 0
+// UNCONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
+// UNCONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
+// UNCONSTRAINED-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
+// UNCONSTRAINED-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
+// UNCONSTRAINED-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmaq_n_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[C]], i32 0
+// CONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
+// CONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
+// CONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
+// CONSTRAINED-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
+// CONSTRAINED-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
+// CONSTRAINED-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
+// CONSTRAINED-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
return vfmaq_n_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmah_lane_f16
-// COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
-// UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
-// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret half [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local half @test_vfmah_lane_f16(
+// UNCONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <4 x half> [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.fma.f16(half [[B]], half [[EXTRACT]], half [[A]])
+// UNCONSTRAINED-NEXT: ret half [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local half @test_vfmah_lane_f16(
+// CONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <4 x half> [[C]], i32 3
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[B]], half [[EXTRACT]], half [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret half [[TMP0]]
+//
float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
return vfmah_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfmah_laneq_f16
-// COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
-// UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
-// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret half [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local half @test_vfmah_laneq_f16(
+// UNCONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <8 x half> [[C]], i32 7
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.fma.f16(half [[B]], half [[EXTRACT]], half [[A]])
+// UNCONSTRAINED-NEXT: ret half [[TMP0]]
+//
+// CONSTRAINED-LABEL: define dso_local half @test_vfmah_laneq_f16(
+// CONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <8 x half> [[C]], i32 7
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[B]], half [[EXTRACT]], half [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret half [[TMP0]]
+//
float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
return vfmah_laneq_f16(a, b, c, 7);
}
-// COMMON-LABEL: test_vfms_lane_f16
-// COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
-// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_lane_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_lane_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[FMLA2]]
+//
float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfms_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfmsq_lane_f16
-// COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
-// COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
-// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_lane_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// UNCONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[FMLA2]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_lane_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CONSTRAINED-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[FMLA2]]
+//
float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
return vfmsq_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfms_laneq_f16
-// COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
-// CHECK-ASM-NOT: fneg
-// COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
-// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_laneq_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_laneq_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
return vfms_laneq_f16(a, b, c, 7);
}
-// COMMON-LABEL: test_vfmsq_laneq_f16
-// COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
-// CHECK-ASM-NOT: fneg
-// COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
-// COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
-// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMLA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_laneq_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_laneq_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmsq_laneq_f16(a, b, c, 7);
}
-// COMMON-LABEL: test_vfms_n_f16
-// COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
-// COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0
-// COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
-// COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
-// COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
-// UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
-// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <4 x half> [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_n_f16(
+// UNCONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[C]], i32 0
+// UNCONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
+// UNCONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <4 x half> @test_vfms_n_f16(
+// CONSTRAINED-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
+// CONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[C]], i32 0
+// CONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
+// CONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
+// CONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <4 x half> [[TMP9]]
+//
float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
return vfms_n_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmsq_n_f16
-// COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
-// COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0
-// COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
-// COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
-// COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
-// COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
-// COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
-// COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
-// COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
-// UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
-// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret <8 x half> [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_n_f16(
+// UNCONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// UNCONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[C]], i32 0
+// UNCONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
+// UNCONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
+// UNCONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
+// UNCONSTRAINED-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
+// UNCONSTRAINED-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
+// UNCONSTRAINED-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// UNCONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// UNCONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// UNCONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
+// CONSTRAINED-LABEL: define dso_local <8 x half> @test_vfmsq_n_f16(
+// CONSTRAINED-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
+// CONSTRAINED-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[C]], i32 0
+// CONSTRAINED-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1
+// CONSTRAINED-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2
+// CONSTRAINED-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3
+// CONSTRAINED-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4
+// CONSTRAINED-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
+// CONSTRAINED-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
+// CONSTRAINED-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CONSTRAINED-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CONSTRAINED-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret <8 x half> [[TMP9]]
+//
float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
return vfmsq_n_f16(a, b, c);
}
-// COMMON-LABEL: test_vfmsh_lane_f16
-// UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
-// CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
-// CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}}
-// COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]]
-// CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}}
-// UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
-// CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}}
-// COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
-// UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
-// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret half [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local half @test_vfmsh_lane_f16(
+// UNCONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CONV:%.*]] = fpext half [[B]] to float
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[CONV]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fptrunc float [[FNEG]] to half
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <4 x half> [[C]], i32 3
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = call half @llvm.fma.f16(half [[TMP0]], half [[EXTRACT]], half [[A]])
+// UNCONSTRAINED-NEXT: ret half [[TMP1]]
+//
+// CONSTRAINED-LABEL: define dso_local half @test_vfmsh_lane_f16(
+// CONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CONV:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[B]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[CONV]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[FNEG]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <4 x half> [[C]], i32 3
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[TMP0]], half [[EXTRACT]], half [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret half [[TMP1]]
+//
float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
return vfmsh_lane_f16(a, b, c, 3);
}
-// COMMON-LABEL: test_vfmsh_laneq_f16
-// UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
-// CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
-// CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}}
-// COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]]
-// CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}}
-// UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
-// CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}}
-// COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
-// UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
-// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
-// COMMONIR: ret half [[FMA]]
+// UNCONSTRAINED-LABEL: define dso_local half @test_vfmsh_laneq_f16(
+// UNCONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[CONV:%.*]] = fpext half [[B]] to float
+// UNCONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[CONV]]
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = fptrunc float [[FNEG]] to half
+// UNCONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <8 x half> [[C]], i32 7
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = call half @llvm.fma.f16(half [[TMP0]], half [[EXTRACT]], half [[A]])
+// UNCONSTRAINED-NEXT: ret half [[TMP1]]
+//
+// CONSTRAINED-LABEL: define dso_local half @test_vfmsh_laneq_f16(
+// CONSTRAINED-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[CONV:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[B]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[FNEG:%.*]] = fneg float [[CONV]]
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[FNEG]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: [[EXTRACT:%.*]] = extractelement <8 x half> [[C]], i32 7
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[TMP0]], half [[EXTRACT]], half [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]]
+// CONSTRAINED-NEXT: ret half [[TMP1]]
+//
float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
return vfmsh_laneq_f16(a, b, c, 7);
}
diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
index 4d2ef318005bd..8c719178d7241 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
@@ -1,11 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature -fullfp16 -target-feature +v8a\
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -15,17 +15,20 @@
// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16
// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP5]], [[VBSL2_I]]
// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP6]]
//
float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
return vbsl_f16(a, b, c);
@@ -34,17 +37,20 @@ float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16
// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+// CHECK-NEXT: [[TMP5:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
+// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP5]], [[VBSL2_I]]
// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
-// CHECK-NEXT: ret <8 x half> [[TMP4]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP6]]
//
float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
return vbslq_f16(a, b, c);
@@ -53,21 +59,22 @@ float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T:%.*]] poison, <4 x half> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x half> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] poison, <4 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
return vzip_f16(a, b);
@@ -76,21 +83,22 @@ float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] poison, <8 x half> [[VZIP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT3]], <8 x half> [[VZIP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
return vzipq_f16(a, b);
@@ -99,21 +107,22 @@ float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T:%.*]] poison, <4 x half> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x half> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] poison, <4 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
return vuzp_f16(a, b);
@@ -122,21 +131,22 @@ float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] poison, <8 x half> [[VUZP_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT3]], <8 x half> [[VUZP1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
return vuzpq_f16(a, b);
@@ -145,21 +155,22 @@ float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T:%.*]] poison, <4 x half> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x half> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] poison, <4 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
return vtrn_f16(a, b);
@@ -168,21 +179,22 @@ float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
-// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
-// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
-// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
-// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] poison, <8 x half> [[VTRN_I]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT3]], <8 x half> [[VTRN1_I]], 0, 1
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT4]], 0
+// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 0
+// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP6]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
//
float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
return vtrnq_f16(a, b);
@@ -251,9 +263,10 @@ float16x8_t test_vdupq_n_f16(float16_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: ret <4 x half> [[LANE]]
//
float16x4_t test_vdup_lane_f16(float16x4_t a) {
@@ -263,9 +276,10 @@ float16x4_t test_vdup_lane_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: ret <8 x half> [[LANE]]
//
float16x8_t test_vdupq_lane_f16(float16x4_t a) {
@@ -275,9 +289,10 @@ float16x8_t test_vdupq_lane_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: ret <4 x half> [[LANE]]
//
float16x4_t test_vdup_laneq_f16(float16x8_t a) {
@@ -287,9 +302,10 @@ float16x4_t test_vdup_laneq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: ret <8 x half> [[LANE]]
//
float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
@@ -299,11 +315,13 @@ float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vext_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
// CHECK-NEXT: ret <4 x half> [[VEXT]]
//
float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
@@ -313,11 +331,13 @@ float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
// CHECK-NEXT: ret <8 x half> [[VEXT]]
//
float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
index 1cce977b60e6b..9c408e8c702fd 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
@@ -11,8 +11,10 @@
// CHECK-LABEL: define {{[^@]+}}@test_vabs_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x half> @llvm.fabs.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x half> @llvm.fabs.v4f16(<4 x half> [[VABS_I]])
// CHECK-NEXT: ret <4 x half> [[VABS1_I]]
//
float16x4_t test_vabs_f16(float16x4_t a) {
@@ -22,8 +24,10 @@ float16x4_t test_vabs_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vabsq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VABS1_I:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[VABS_I]])
// CHECK-NEXT: ret <8 x half> [[VABS1_I]]
//
float16x8_t test_vabsq_f16(float16x8_t a) {
@@ -33,9 +37,11 @@ float16x8_t test_vabsq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vceqz_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <4 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
//
uint16x4_t test_vceqz_f16(float16x4_t a) {
@@ -45,9 +51,11 @@ uint16x4_t test_vceqz_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vceqzq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <8 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <8 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
//
uint16x8_t test_vceqzq_f16(float16x8_t a) {
@@ -57,9 +65,11 @@ uint16x8_t test_vceqzq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcgez_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp oge <4 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <4 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[VCGEZ_I]]
//
uint16x4_t test_vcgez_f16(float16x4_t a) {
@@ -69,9 +79,11 @@ uint16x4_t test_vcgez_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcgezq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp oge <8 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <8 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[VCGEZ_I]]
//
uint16x8_t test_vcgezq_f16(float16x8_t a) {
@@ -81,9 +93,11 @@ uint16x8_t test_vcgezq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcgtz_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[VCGTZ_I]]
//
uint16x4_t test_vcgtz_f16(float16x4_t a) {
@@ -93,9 +107,11 @@ uint16x4_t test_vcgtz_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcgtzq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <8 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[VCGTZ_I]]
//
uint16x8_t test_vcgtzq_f16(float16x8_t a) {
@@ -105,9 +121,11 @@ uint16x8_t test_vcgtzq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vclez_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp ole <4 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <4 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[VCLEZ_I]]
//
uint16x4_t test_vclez_f16(float16x4_t a) {
@@ -117,9 +135,11 @@ uint16x4_t test_vclez_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vclezq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp ole <8 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp ole <8 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[VCLEZ_I]]
//
uint16x8_t test_vclezq_f16(float16x8_t a) {
@@ -129,9 +149,11 @@ uint16x8_t test_vclezq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcltz_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
// CHECK-NEXT: ret <4 x i16> [[VCLTZ_I]]
//
uint16x4_t test_vcltz_f16(float16x4_t a) {
@@ -141,9 +163,11 @@ uint16x4_t test_vcltz_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcltzq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <8 x half> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <8 x half> [[TMP2]], zeroinitializer
+// CHECK-NEXT: [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
// CHECK-NEXT: ret <8 x i16> [[VCLTZ_I]]
//
uint16x8_t test_vcltzq_f16(float16x8_t a) {
@@ -154,7 +178,8 @@ uint16x8_t test_vcltzq_f16(float16x8_t a) {
// CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x half>
// CHECK-NEXT: ret <4 x half> [[VCVT_I]]
//
float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
@@ -165,7 +190,8 @@ float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x half>
// CHECK-NEXT: ret <8 x half> [[VCVT_I]]
//
float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
@@ -176,7 +202,8 @@ float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
// CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x half>
// CHECK-NEXT: ret <4 x half> [[VCVT_I]]
//
float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
@@ -187,7 +214,8 @@ float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x half>
// CHECK-NEXT: ret <8 x half> [[VCVT_I]]
//
float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
@@ -197,8 +225,10 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzs.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzs.v4i16.v4f16(<4 x half> [[VCVTZ_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTZ1_I]]
//
int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
@@ -208,8 +238,10 @@ int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> [[VCVTZ_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTZ1_I]]
//
int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
@@ -219,8 +251,10 @@ int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzu.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzu.v4i16.v4f16(<4 x half> [[VCVTZ_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTZ1_I]]
//
uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
@@ -230,8 +264,10 @@ uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTZ1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> [[VCVTZ_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTZ1_I]]
//
uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
@@ -241,8 +277,10 @@ uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvta_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtas.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtas.v4i16.v4f16(<4 x half> [[VCVTA_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTA1_I]]
//
int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
@@ -252,8 +290,10 @@ int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvta_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> [[VCVTA_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTA1_I]]
//
uint16x4_t test_vcvta_u16_f16 (float16x4_t a) {
@@ -263,8 +303,10 @@ uint16x4_t test_vcvta_u16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtaq_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTA1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> [[VCVTA_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTA1_I]]
//
int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
@@ -274,8 +316,10 @@ int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtm_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtms.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtms.v4i16.v4f16(<4 x half> [[VCVTM_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTM1_I]]
//
int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
@@ -285,8 +329,10 @@ int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtmq_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtms.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtms.v8i16.v8f16(<8 x half> [[VCVTM_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTM1_I]]
//
int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
@@ -296,8 +342,10 @@ int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtm_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtmu.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtmu.v4i16.v4f16(<4 x half> [[VCVTM_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTM1_I]]
//
uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
@@ -307,8 +355,10 @@ uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtmq_u16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtmu.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTM1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtmu.v8i16.v8f16(<8 x half> [[VCVTM_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTM1_I]]
//
uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
@@ -318,8 +368,10 @@ uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtn_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtns.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtns.v4i16.v4f16(<4 x half> [[VCVTN_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTN1_I]]
//
int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
@@ -329,8 +381,10 @@ int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtnq_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtns.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtns.v8i16.v8f16(<8 x half> [[VCVTN_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTN1_I]]
//
int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
@@ -340,8 +394,10 @@ int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtn_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtnu.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtnu.v4i16.v4f16(<4 x half> [[VCVTN_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTN1_I]]
//
uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
@@ -351,8 +407,10 @@ uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtnq_u16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtnu.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTN1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtnu.v8i16.v8f16(<8 x half> [[VCVTN_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTN1_I]]
//
uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
@@ -362,8 +420,10 @@ uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtp_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtps.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtps.v4i16.v4f16(<4 x half> [[VCVTP_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTP1_I]]
//
int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
@@ -373,8 +433,10 @@ int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtpq_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtps.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtps.v8i16.v8f16(<8 x half> [[VCVTP_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTP1_I]]
//
int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
@@ -384,8 +446,10 @@ int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtp_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtpu.v4i16.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtpu.v4i16.v4f16(<4 x half> [[VCVTP_I]])
// CHECK-NEXT: ret <4 x i16> [[VCVTP1_I]]
//
uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
@@ -395,8 +459,10 @@ uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtpq_u16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtpu.v8i16.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTP1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtpu.v8i16.v8f16(<8 x half> [[VCVTP_I]])
// CHECK-NEXT: ret <8 x i16> [[VCVTP1_I]]
//
uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
@@ -427,8 +493,10 @@ float16x8_t test_vnegq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrecpe_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frecpe.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frecpe.v4f16(<4 x half> [[VRECPE_V_I]])
// CHECK-NEXT: ret <4 x half> [[VRECPE_V1_I]]
//
float16x4_t test_vrecpe_f16(float16x4_t a) {
@@ -438,8 +506,10 @@ float16x4_t test_vrecpe_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrecpeq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frecpe.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frecpe.v8f16(<8 x half> [[VRECPEQ_V_I]])
// CHECK-NEXT: ret <8 x half> [[VRECPEQ_V1_I]]
//
float16x8_t test_vrecpeq_f16(float16x8_t a) {
@@ -449,8 +519,10 @@ float16x8_t test_vrecpeq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrnd_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <4 x half> @llvm.trunc.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <4 x half> @llvm.trunc.v4f16(<4 x half> [[VRNDZ_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDZ1_I]]
//
float16x4_t test_vrnd_f16(float16x4_t a) {
@@ -460,8 +532,10 @@ float16x4_t test_vrnd_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[VRNDZ_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDZ1_I]]
//
float16x8_t test_vrndq_f16(float16x8_t a) {
@@ -471,8 +545,10 @@ float16x8_t test_vrndq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrnda_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <4 x half> @llvm.round.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <4 x half> @llvm.round.v4f16(<4 x half> [[VRNDA_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDA1_I]]
//
float16x4_t test_vrnda_f16(float16x4_t a) {
@@ -482,8 +558,10 @@ float16x4_t test_vrnda_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndaq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[VRNDA_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDA1_I]]
//
float16x8_t test_vrndaq_f16(float16x8_t a) {
@@ -493,8 +571,10 @@ float16x8_t test_vrndaq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndi_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDI_V1_I:%.*]] = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDI_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDI_V1_I:%.*]] = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> [[VRNDI_V_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDI_V1_I]]
//
float16x4_t test_vrndi_f16(float16x4_t a) {
@@ -504,8 +584,10 @@ float16x4_t test_vrndi_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndiq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDIQ_V1_I:%.*]] = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDIQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDIQ_V1_I:%.*]] = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> [[VRNDIQ_V_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDIQ_V1_I]]
//
float16x8_t test_vrndiq_f16(float16x8_t a) {
@@ -515,8 +597,10 @@ float16x8_t test_vrndiq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndm_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <4 x half> @llvm.floor.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <4 x half> @llvm.floor.v4f16(<4 x half> [[VRNDM_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDM1_I]]
//
float16x4_t test_vrndm_f16(float16x4_t a) {
@@ -526,8 +610,10 @@ float16x4_t test_vrndm_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndmq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[VRNDM_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDM1_I]]
//
float16x8_t test_vrndmq_f16(float16x8_t a) {
@@ -537,8 +623,10 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndn_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> [[VRNDN_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDN1_I]]
//
float16x4_t test_vrndn_f16(float16x4_t a) {
@@ -548,8 +636,10 @@ float16x4_t test_vrndn_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndnq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[VRNDN_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDN1_I]]
//
float16x8_t test_vrndnq_f16(float16x8_t a) {
@@ -559,8 +649,10 @@ float16x8_t test_vrndnq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndp_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <4 x half> @llvm.ceil.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <4 x half> @llvm.ceil.v4f16(<4 x half> [[VRNDP_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDP1_I]]
//
float16x4_t test_vrndp_f16(float16x4_t a) {
@@ -570,8 +662,10 @@ float16x4_t test_vrndp_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndpq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[VRNDP_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDP1_I]]
//
float16x8_t test_vrndpq_f16(float16x8_t a) {
@@ -581,8 +675,10 @@ float16x8_t test_vrndpq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndx_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <4 x half> @llvm.rint.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <4 x half> @llvm.rint.v4f16(<4 x half> [[VRNDX_I]])
// CHECK-NEXT: ret <4 x half> [[VRNDX1_I]]
//
float16x4_t test_vrndx_f16(float16x4_t a) {
@@ -592,8 +688,10 @@ float16x4_t test_vrndx_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrndxq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[VRNDX_I]])
// CHECK-NEXT: ret <8 x half> [[VRNDX1_I]]
//
float16x8_t test_vrndxq_f16(float16x8_t a) {
@@ -603,8 +701,10 @@ float16x8_t test_vrndxq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrsqrte_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrte.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrte.v4f16(<4 x half> [[VRSQRTE_V_I]])
// CHECK-NEXT: ret <4 x half> [[VRSQRTE_V1_I]]
//
float16x4_t test_vrsqrte_f16(float16x4_t a) {
@@ -614,8 +714,10 @@ float16x4_t test_vrsqrte_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vrsqrteq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrte.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrte.v8f16(<8 x half> [[VRSQRTEQ_V_I]])
// CHECK-NEXT: ret <8 x half> [[VRSQRTEQ_V1_I]]
//
float16x8_t test_vrsqrteq_f16(float16x8_t a) {
@@ -625,8 +727,10 @@ float16x8_t test_vrsqrteq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vsqrt_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[TMP2]])
// CHECK-NEXT: ret <4 x half> [[VSQRT_I]]
//
float16x4_t test_vsqrt_f16(float16x4_t a) {
@@ -636,8 +740,10 @@ float16x4_t test_vsqrt_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vsqrtq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VSQRT_I:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[TMP2]])
// CHECK-NEXT: ret <8 x half> [[VSQRT_I]]
//
float16x8_t test_vsqrtq_f16(float16x8_t a) {
@@ -667,9 +773,13 @@ float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vabd_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> [[VABD_I]], <4 x half> [[VABD1_I]])
// CHECK-NEXT: ret <4 x half> [[VABD2_I]]
//
float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
@@ -679,9 +789,13 @@ float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vabdq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VABD2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> [[VABD_I]], <8 x half> [[VABD1_I]])
// CHECK-NEXT: ret <8 x half> [[VABD2_I]]
//
float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
@@ -691,9 +805,13 @@ float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcage_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VCAGE_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCAGE_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> [[VCAGE_V_I]], <4 x half> [[VCAGE_V1_I]])
// CHECK-NEXT: ret <4 x i16> [[VCAGE_V2_I]]
//
uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
@@ -703,9 +821,13 @@ uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcageq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VCAGEQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCAGEQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> [[VCAGEQ_V_I]], <8 x half> [[VCAGEQ_V1_I]])
// CHECK-NEXT: ret <8 x i16> [[VCAGEQ_V2_I]]
//
uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
@@ -715,9 +837,13 @@ uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcagt_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VCAGT_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCAGT_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> [[VCAGT_V_I]], <4 x half> [[VCAGT_V1_I]])
// CHECK-NEXT: ret <4 x i16> [[VCAGT_V2_I]]
//
uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
@@ -727,9 +853,13 @@ uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcagtq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VCAGTQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCAGTQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> [[VCAGTQ_V_I]], <8 x half> [[VCAGTQ_V1_I]])
// CHECK-NEXT: ret <8 x i16> [[VCAGTQ_V2_I]]
//
uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
@@ -739,9 +869,13 @@ uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcale_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VCALE_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> [[B]], <4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VCALE_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facge.v4i16.v4f16(<4 x half> [[VCALE_V_I]], <4 x half> [[VCALE_V1_I]])
// CHECK-NEXT: ret <4 x i16> [[VCALE_V2_I]]
//
uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
@@ -751,9 +885,13 @@ uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcaleq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VCALEQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> [[B]], <8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VCALEQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facge.v8i16.v8f16(<8 x half> [[VCALEQ_V_I]], <8 x half> [[VCALEQ_V1_I]])
// CHECK-NEXT: ret <8 x i16> [[VCALEQ_V2_I]]
//
uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
@@ -763,9 +901,13 @@ uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcalt_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VCALT_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> [[B]], <4 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VCALT_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.facgt.v4i16.v4f16(<4 x half> [[VCALT_V_I]], <4 x half> [[VCALT_V1_I]])
// CHECK-NEXT: ret <4 x i16> [[VCALT_V2_I]]
//
uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
@@ -775,9 +917,13 @@ uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vcaltq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VCALTQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> [[B]], <8 x half> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VCALTQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.facgt.v8i16.v8f16(<8 x half> [[VCALTQ_V_I]], <8 x half> [[VCALTQ_V1_I]])
// CHECK-NEXT: ret <8 x i16> [[VCALTQ_V2_I]]
//
uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
@@ -945,8 +1091,9 @@ float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvt_n_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> [[VCVT_N]], i32 2)
// CHECK-NEXT: ret <4 x i16> [[VCVT_N1]]
//
@@ -957,8 +1104,9 @@ int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_n_s16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VCVT_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> [[VCVT_N]], i32 2)
// CHECK-NEXT: ret <8 x i16> [[VCVT_N1]]
//
@@ -969,8 +1117,9 @@ int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvt_n_u16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> [[VCVT_N]], i32 2)
// CHECK-NEXT: ret <4 x i16> [[VCVT_N1]]
//
@@ -981,8 +1130,9 @@ uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_n_u16_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VCVT_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> [[VCVT_N]], i32 2)
// CHECK-NEXT: ret <8 x i16> [[VCVT_N1]]
//
@@ -1013,9 +1163,13 @@ float16x8_t test_vdivq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmax_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmax.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmax.v4f16(<4 x half> [[VMAX_I]], <4 x half> [[VMAX1_I]])
// CHECK-NEXT: ret <4 x half> [[VMAX2_I]]
//
float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
@@ -1025,9 +1179,13 @@ float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMAX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmax.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMAX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmax.v8f16(<8 x half> [[VMAX_I]], <8 x half> [[VMAX1_I]])
// CHECK-NEXT: ret <8 x half> [[VMAX2_I]]
//
float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
@@ -1037,9 +1195,13 @@ float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnm_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[VMAXNM_I]], <4 x half> [[VMAXNM1_I]])
// CHECK-NEXT: ret <4 x half> [[VMAXNM2_I]]
//
float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
@@ -1049,9 +1211,13 @@ float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnmq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnm.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMAXNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnm.v8f16(<8 x half> [[VMAXNM_I]], <8 x half> [[VMAXNM1_I]])
// CHECK-NEXT: ret <8 x half> [[VMAXNM2_I]]
//
float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
@@ -1061,9 +1227,13 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmin_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmin.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmin.v4f16(<4 x half> [[VMIN_I]], <4 x half> [[VMIN1_I]])
// CHECK-NEXT: ret <4 x half> [[VMIN2_I]]
//
float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
@@ -1073,9 +1243,13 @@ float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vminq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMIN2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmin.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMIN2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmin.v8f16(<8 x half> [[VMIN_I]], <8 x half> [[VMIN1_I]])
// CHECK-NEXT: ret <8 x half> [[VMIN2_I]]
//
float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
@@ -1085,9 +1259,13 @@ float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vminnm_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[VMINNM_I]], <4 x half> [[VMINNM1_I]])
// CHECK-NEXT: ret <4 x half> [[VMINNM2_I]]
//
float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
@@ -1097,9 +1275,13 @@ float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vminnmq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnm.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMINNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnm.v8f16(<8 x half> [[VMINNM_I]], <8 x half> [[VMINNM1_I]])
// CHECK-NEXT: ret <8 x half> [[VMINNM2_I]]
//
float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
@@ -1129,9 +1311,13 @@ float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulx_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[VMULX_I]], <4 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x half> [[VMULX2_I]]
//
float16x4_t test_vmulx_f16(float16x4_t a, float16x4_t b) {
@@ -1141,9 +1327,13 @@ float16x4_t test_vmulx_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulxq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[VMULX_I]], <8 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <8 x half> [[VMULX2_I]]
//
float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
@@ -1153,11 +1343,17 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpadd_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> [[VPADD_V_I]], <4 x half> [[VPADD_V1_I]])
// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <4 x half> [[VPADD_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <4 x half> [[VPADD_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
return vpadd_f16(a, b);
@@ -1166,11 +1362,17 @@ float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpaddq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VPADDQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> [[VPADDQ_V_I]], <8 x half> [[VPADDQ_V1_I]])
// CHECK-NEXT: [[VPADDQ_V3_I:%.*]] = bitcast <8 x half> [[VPADDQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <8 x half> [[VPADDQ_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP5]]
//
float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
return vpaddq_f16(a, b);
@@ -1179,9 +1381,13 @@ float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpmax_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxp.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxp.v4f16(<4 x half> [[VPMAX_I]], <4 x half> [[VPMAX1_I]])
// CHECK-NEXT: ret <4 x half> [[VPMAX2_I]]
//
float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
@@ -1191,9 +1397,13 @@ float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpmaxq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxp.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VPMAX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxp.v8f16(<8 x half> [[VPMAX_I]], <8 x half> [[VPMAX1_I]])
// CHECK-NEXT: ret <8 x half> [[VPMAX2_I]]
//
float16x8_t test_vpmaxq_f16(float16x8_t a, float16x8_t b) {
@@ -1203,9 +1413,13 @@ float16x8_t test_vpmaxq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpmaxnm_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> [[VPMAXNM_I]], <4 x half> [[VPMAXNM1_I]])
// CHECK-NEXT: ret <4 x half> [[VPMAXNM2_I]]
//
float16x4_t test_vpmaxnm_f16(float16x4_t a, float16x4_t b) {
@@ -1215,9 +1429,13 @@ float16x4_t test_vpmaxnm_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpmaxnmq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VPMAXNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> [[VPMAXNM_I]], <8 x half> [[VPMAXNM1_I]])
// CHECK-NEXT: ret <8 x half> [[VPMAXNM2_I]]
//
float16x8_t test_vpmaxnmq_f16(float16x8_t a, float16x8_t b) {
@@ -1227,9 +1445,13 @@ float16x8_t test_vpmaxnmq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpmin_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminp.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminp.v4f16(<4 x half> [[VPMIN_I]], <4 x half> [[VPMIN1_I]])
// CHECK-NEXT: ret <4 x half> [[VPMIN2_I]]
//
float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
@@ -1239,9 +1461,13 @@ float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpminq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminp.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VPMIN2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminp.v8f16(<8 x half> [[VPMIN_I]], <8 x half> [[VPMIN1_I]])
// CHECK-NEXT: ret <8 x half> [[VPMIN2_I]]
//
float16x8_t test_vpminq_f16(float16x8_t a, float16x8_t b) {
@@ -1251,9 +1477,13 @@ float16x8_t test_vpminq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpminnm_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> [[VPMINNM_I]], <4 x half> [[VPMINNM1_I]])
// CHECK-NEXT: ret <4 x half> [[VPMINNM2_I]]
//
float16x4_t test_vpminnm_f16(float16x4_t a, float16x4_t b) {
@@ -1263,9 +1493,13 @@ float16x4_t test_vpminnm_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vpminnmq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VPMINNM2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> [[VPMINNM_I]], <8 x half> [[VPMINNM1_I]])
// CHECK-NEXT: ret <8 x half> [[VPMINNM2_I]]
//
float16x8_t test_vpminnmq_f16(float16x8_t a, float16x8_t b) {
@@ -1275,11 +1509,17 @@ float16x8_t test_vpminnmq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vrecps_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VRECPS_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frecps.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VRECPS_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frecps.v4f16(<4 x half> [[VRECPS_V_I]], <4 x half> [[VRECPS_V1_I]])
// CHECK-NEXT: [[VRECPS_V3_I:%.*]] = bitcast <4 x half> [[VRECPS_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <4 x half> [[VRECPS_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
return vrecps_f16(a, b);
@@ -1288,11 +1528,17 @@ float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vrecpsq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VRECPSQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frecps.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VRECPSQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frecps.v8f16(<8 x half> [[VRECPSQ_V_I]], <8 x half> [[VRECPSQ_V1_I]])
// CHECK-NEXT: [[VRECPSQ_V3_I:%.*]] = bitcast <8 x half> [[VRECPSQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <8 x half> [[VRECPSQ_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP5]]
//
float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
return vrecpsq_f16(a, b);
@@ -1301,11 +1547,17 @@ float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vrsqrts_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VRSQRTS_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrts.v4f16(<4 x half> [[A]], <4 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VRSQRTS_V2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.frsqrts.v4f16(<4 x half> [[VRSQRTS_V_I]], <4 x half> [[VRSQRTS_V1_I]])
// CHECK-NEXT: [[VRSQRTS_V3_I:%.*]] = bitcast <4 x half> [[VRSQRTS_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <4 x half> [[VRSQRTS_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
return vrsqrts_f16(a, b);
@@ -1314,11 +1566,17 @@ float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vrsqrtsq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VRSQRTSQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrts.v8f16(<8 x half> [[A]], <8 x half> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VRSQRTSQ_V2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.frsqrts.v8f16(<8 x half> [[VRSQRTSQ_V_I]], <8 x half> [[VRSQRTSQ_V1_I]])
// CHECK-NEXT: [[VRSQRTSQ_V3_I:%.*]] = bitcast <8 x half> [[VRSQRTSQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <8 x half> [[VRSQRTSQ_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP5]]
//
float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
return vrsqrtsq_f16(a, b);
@@ -1347,11 +1605,17 @@ float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vfma_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[B]], <4 x half> [[C]], <4 x half> [[A]])
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfma_f16(a, b, c);
@@ -1360,11 +1624,17 @@ float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfmaq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[C]], <8 x half> [[A]])
-// CHECK-NEXT: ret <8 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmaq_f16(a, b, c);
@@ -1374,11 +1644,17 @@ float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG_I]], <4 x half> [[C]], <4 x half> [[A]])
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
return vfms_f16(a, b, c);
@@ -1388,11 +1664,17 @@ float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG_I]], <8 x half> [[C]], <8 x half> [[A]])
-// CHECK-NEXT: ret <8 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmsq_f16(a, b, c);
@@ -1401,13 +1683,16 @@ float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfma_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]])
// CHECK-NEXT: ret <4 x half> [[FMLA2]]
//
@@ -1418,13 +1703,16 @@ float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfmaq_lane_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
// CHECK-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]])
// CHECK-NEXT: ret <8 x half> [[FMLA2]]
//
@@ -1435,15 +1723,18 @@ float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfma_laneq_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
-// CHECK-NEXT: ret <4 x half> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
return vfma_laneq_f16(a, b, c, 7);
@@ -1452,15 +1743,18 @@ float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfmaq_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP6:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
-// CHECK-NEXT: ret <8 x half> [[TMP6]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmaq_laneq_f16(a, b, c, 7);
@@ -1473,11 +1767,17 @@ float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[B]], <4 x half> [[VECINIT3]], <4 x half> [[A]])
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
return vfma_n_f16(a, b, c);
@@ -1494,11 +1794,17 @@ float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[VECINIT7]], <8 x half> [[A]])
-// CHECK-NEXT: ret <8 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
return vfmaq_n_f16(a, b, c);
@@ -1529,14 +1835,17 @@ float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfms_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
// CHECK-NEXT: [[FMLA2:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FMLA]], <4 x half> [[LANE]], <4 x half> [[FMLA1]])
// CHECK-NEXT: ret <4 x half> [[FMLA2]]
//
@@ -1547,14 +1856,17 @@ float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfmsq_lane_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP6]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
// CHECK-NEXT: [[FMLA2:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FMLA]], <8 x half> [[LANE]], <8 x half> [[FMLA1]])
// CHECK-NEXT: ret <8 x half> [[FMLA2]]
//
@@ -1565,16 +1877,19 @@ float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfms_laneq_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
-// CHECK-NEXT: ret <4 x half> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP7]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
return vfms_laneq_f16(a, b, c, 7);
@@ -1583,16 +1898,19 @@ float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vfmsq_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP6:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
-// CHECK-NEXT: ret <8 x half> [[TMP6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP8]], <8 x half> [[TMP8]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP7]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
return vfmsq_laneq_f16(a, b, c, 7);
@@ -1606,11 +1924,17 @@ float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1
// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG]], <4 x half> [[VECINIT3]], <4 x half> [[A]])
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], <4 x half> [[TMP6]])
+// CHECK-NEXT: ret <4 x half> [[TMP9]]
//
float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
return vfms_n_f16(a, b, c);
@@ -1628,11 +1952,17 @@ float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5
// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6
// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG]], <8 x half> [[VECINIT7]], <8 x half> [[A]])
-// CHECK-NEXT: ret <8 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], <8 x half> [[TMP6]])
+// CHECK-NEXT: ret <8 x half> [[TMP9]]
//
float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
return vfmsq_n_f16(a, b, c);
@@ -1669,9 +1999,10 @@ float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
// CHECK-LABEL: define {{[^@]+}}@test_vmul_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x half> [[A]], [[LANE]]
// CHECK-NEXT: ret <4 x half> [[MUL]]
//
@@ -1682,9 +2013,10 @@ float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulq_lane_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x half> [[A]], [[LANE]]
// CHECK-NEXT: ret <8 x half> [[MUL]]
//
@@ -1695,9 +2027,10 @@ float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmul_laneq_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x half> [[A]], [[LANE]]
// CHECK-NEXT: ret <4 x half> [[MUL]]
//
@@ -1708,9 +2041,10 @@ float16x4_t test_vmul_laneq_f16(float16x4_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulq_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x half> [[A]], [[LANE]]
// CHECK-NEXT: ret <8 x half> [[MUL]]
//
@@ -1754,16 +2088,12 @@ float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_lane_f16
// CHECK-SAME: (half noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
-// CHECK-NEXT: store <4 x half> [[B]], ptr [[__REINT_847]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2
-// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP1]] to float
-// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK-NEXT: [[CONV3:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV3]]
// CHECK-NEXT: [[TMP2:%.*]] = fptrunc float [[MUL]] to half
// CHECK-NEXT: ret half [[TMP2]]
//
@@ -1774,16 +2104,12 @@ float16_t test_vmulh_lane_f16(float16_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_laneq_f16
// CHECK-SAME: (half noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
-// CHECK-NEXT: store <8 x half> [[B]], ptr [[__REINT_850]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2
-// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP1]] to float
-// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGETQ_LANE]] to half
+// CHECK-NEXT: [[CONV3:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV3]]
// CHECK-NEXT: [[TMP2:%.*]] = fptrunc float [[MUL]] to half
// CHECK-NEXT: ret half [[TMP2]]
//
@@ -1794,12 +2120,17 @@ float16_t test_vmulh_laneq_f16(float16_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulx_lane_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LANE]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[VMULX_I]], <4 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x half> [[VMULX2_I]]
//
float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
@@ -1809,12 +2140,17 @@ float16x4_t test_vmulx_lane_f16(float16x4_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulxq_lane_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[LANE]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[VMULX_I]], <8 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <8 x half> [[VMULX2_I]]
//
float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
@@ -1824,12 +2160,17 @@ float16x8_t test_vmulxq_lane_f16(float16x8_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulx_laneq_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LANE]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LANE]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[VMULX_I]], <4 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x half> [[VMULX2_I]]
//
float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
@@ -1839,12 +2180,17 @@ float16x4_t test_vmulx_laneq_f16(float16x4_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulxq_laneq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[LANE]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[LANE]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[LANE]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[VMULX_I]], <8 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <8 x half> [[VMULX2_I]]
//
float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
@@ -1858,9 +2204,13 @@ float16x8_t test_vmulxq_laneq_f16(float16x8_t a, float16x8_t b) {
// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[B]], i32 1
// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[B]], i32 2
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[B]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[VECINIT3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[VMULX_I]], <4 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <4 x half> [[VMULX2_I]]
//
float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
@@ -1878,9 +2228,13 @@ float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) {
// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[B]], i32 5
// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[B]], i32 6
// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[B]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8>
-// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[VECINIT7]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[VMULX_I]], <8 x half> [[VMULX1_I]])
// CHECK-NEXT: ret <8 x half> [[VMULX2_I]]
//
float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
@@ -1912,8 +2266,9 @@ float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxv_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[VMAXV]])
// CHECK-NEXT: ret half [[VMAXV1]]
//
@@ -1924,8 +2279,9 @@ float16_t test_vmaxv_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[VMAXV]])
// CHECK-NEXT: ret half [[VMAXV1]]
//
@@ -1936,8 +2292,9 @@ float16_t test_vmaxvq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vminv_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VMINV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VMINV:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[VMINV]])
// CHECK-NEXT: ret half [[VMINV1]]
//
@@ -1948,8 +2305,9 @@ float16_t test_vminv_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vminvq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VMINV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VMINV:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[VMINV]])
// CHECK-NEXT: ret half [[VMINV1]]
//
@@ -1960,8 +2318,9 @@ float16_t test_vminvq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnmv_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[VMAXNMV]])
// CHECK-NEXT: ret half [[VMAXNMV1]]
//
@@ -1972,8 +2331,9 @@ float16_t test_vmaxnmv_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnmvq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[VMAXNMV]])
// CHECK-NEXT: ret half [[VMAXNMV1]]
//
@@ -1984,8 +2344,9 @@ float16_t test_vmaxnmvq_f16(float16x8_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vminnmv_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
// CHECK-NEXT: [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[VMINNMV]])
// CHECK-NEXT: ret half [[VMINNMV1]]
//
@@ -1996,8 +2357,9 @@ float16_t test_vminnmv_f16(float16x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vminnmvq_f16
// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
// CHECK-NEXT: [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[VMINNMV]])
// CHECK-NEXT: ret half [[VMINNMV1]]
//
diff --git a/clang/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c b/clang/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
index c44dd333c9754..0138fad1a7792 100644
--- a/clang/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
+++ b/clang/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
@@ -1,120 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v8.5a\
// RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg \
+// RUN: | opt -S -passes=mem2reg,sroa \
// RUN: | FileCheck %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: test_vrnd32x_f32
-// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[RND]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrnd32x_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRND32X_F32_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRND32X_F321_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> [[VRND32X_F32_I]])
+// CHECK-NEXT: [[VRND32X_F322_I:%.*]] = bitcast <2 x float> [[VRND32X_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND32X_F322_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP3]]
+//
float32x2_t test_vrnd32x_f32(float32x2_t a) {
return vrnd32x_f32(a);
}
-// CHECK-LABEL: test_vrnd32xq_f32
-// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[RND]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrnd32xq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND32XQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRND32XQ_F321_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> [[VRND32XQ_F32_I]])
+// CHECK-NEXT: [[VRND32XQ_F322_I:%.*]] = bitcast <4 x float> [[VRND32XQ_F321_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND32XQ_F322_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vrnd32xq_f32(float32x4_t a) {
return vrnd32xq_f32(a);
}
-// CHECK-LABEL: test_vrnd32z_f32
-// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[RND]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrnd32z_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRND32Z_F32_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRND32Z_F321_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> [[VRND32Z_F32_I]])
+// CHECK-NEXT: [[VRND32Z_F322_I:%.*]] = bitcast <2 x float> [[VRND32Z_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND32Z_F322_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP3]]
+//
float32x2_t test_vrnd32z_f32(float32x2_t a) {
return vrnd32z_f32(a);
}
-// CHECK-LABEL: test_vrnd32zq_f32
-// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[RND]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrnd32zq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND32ZQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRND32ZQ_F321_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> [[VRND32ZQ_F32_I]])
+// CHECK-NEXT: [[VRND32ZQ_F322_I:%.*]] = bitcast <4 x float> [[VRND32ZQ_F321_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND32ZQ_F322_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vrnd32zq_f32(float32x4_t a) {
return vrnd32zq_f32(a);
}
-// CHECK-LABEL: test_vrnd64x_f32
-// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[RND]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrnd64x_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRND64X_F32_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRND64X_F321_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> [[VRND64X_F32_I]])
+// CHECK-NEXT: [[VRND64X_F322_I:%.*]] = bitcast <2 x float> [[VRND64X_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND64X_F322_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP3]]
+//
float32x2_t test_vrnd64x_f32(float32x2_t a) {
return vrnd64x_f32(a);
}
-// CHECK-LABEL: test_vrnd64xq_f32
-// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[RND]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrnd64xq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND64XQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRND64XQ_F321_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> [[VRND64XQ_F32_I]])
+// CHECK-NEXT: [[VRND64XQ_F322_I:%.*]] = bitcast <4 x float> [[VRND64XQ_F321_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND64XQ_F322_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vrnd64xq_f32(float32x4_t a) {
return vrnd64xq_f32(a);
}
-// CHECK-LABEL: test_vrnd64z_f32
-// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[RND]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrnd64z_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRND64Z_F32_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRND64Z_F321_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> [[VRND64Z_F32_I]])
+// CHECK-NEXT: [[VRND64Z_F322_I:%.*]] = bitcast <2 x float> [[VRND64Z_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND64Z_F322_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP3]]
+//
float32x2_t test_vrnd64z_f32(float32x2_t a) {
return vrnd64z_f32(a);
}
-// CHECK-LABEL: test_vrnd64zq_f32
-// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[RND]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrnd64zq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND64ZQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRND64ZQ_F321_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> [[VRND64ZQ_F32_I]])
+// CHECK-NEXT: [[VRND64ZQ_F322_I:%.*]] = bitcast <4 x float> [[VRND64ZQ_F321_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND64ZQ_F322_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vrnd64zq_f32(float32x4_t a) {
return vrnd64zq_f32(a);
}
-// CHECK-LABEL: test_vrnd32x_f64
-// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[RND]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnd32x_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRND32X_F64_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRND32X_F641_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> [[VRND32X_F64_I]])
+// CHECK-NEXT: [[VRND32X_F642_I:%.*]] = bitcast <1 x double> [[VRND32X_F641_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND32X_F642_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP3]]
+//
float64x1_t test_vrnd32x_f64(float64x1_t a) {
return vrnd32x_f64(a);
}
-// CHECK-LABEL: test_vrnd32xq_f64
-// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[RND]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrnd32xq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND32XQ_F64_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRND32XQ_F641_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> [[VRND32XQ_F64_I]])
+// CHECK-NEXT: [[VRND32XQ_F642_I:%.*]] = bitcast <2 x double> [[VRND32XQ_F641_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND32XQ_F642_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t test_vrnd32xq_f64(float64x2_t a) {
return vrnd32xq_f64(a);
}
-// CHECK-LABEL: test_vrnd32z_f64
-// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[RND]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnd32z_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRND32Z_F64_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRND32Z_F641_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> [[VRND32Z_F64_I]])
+// CHECK-NEXT: [[VRND32Z_F642_I:%.*]] = bitcast <1 x double> [[VRND32Z_F641_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND32Z_F642_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP3]]
+//
float64x1_t test_vrnd32z_f64(float64x1_t a) {
return vrnd32z_f64(a);
}
-// CHECK-LABEL: test_vrnd32zq_f64
-// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[RND]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrnd32zq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND32ZQ_F64_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRND32ZQ_F641_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> [[VRND32ZQ_F64_I]])
+// CHECK-NEXT: [[VRND32ZQ_F642_I:%.*]] = bitcast <2 x double> [[VRND32ZQ_F641_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND32ZQ_F642_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t test_vrnd32zq_f64(float64x2_t a) {
return vrnd32zq_f64(a);
}
-// CHECK-LABEL: test_vrnd64x_f64
-// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[RND]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnd64x_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRND64X_F64_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRND64X_F641_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> [[VRND64X_F64_I]])
+// CHECK-NEXT: [[VRND64X_F642_I:%.*]] = bitcast <1 x double> [[VRND64X_F641_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND64X_F642_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP3]]
+//
float64x1_t test_vrnd64x_f64(float64x1_t a) {
return vrnd64x_f64(a);
}
-// CHECK-LABEL: test_vrnd64xq_f64
-// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[RND]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrnd64xq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND64XQ_F64_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRND64XQ_F641_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> [[VRND64XQ_F64_I]])
+// CHECK-NEXT: [[VRND64XQ_F642_I:%.*]] = bitcast <2 x double> [[VRND64XQ_F641_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND64XQ_F642_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t test_vrnd64xq_f64(float64x2_t a) {
return vrnd64xq_f64(a);
}
-// CHECK-LABEL: test_vrnd64z_f64
-// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
-// CHECK: ret <1 x double> [[RND]]
+// CHECK-LABEL: define dso_local <1 x double> @test_vrnd64z_f64(
+// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[VRND64Z_F64_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[VRND64Z_F641_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> [[VRND64Z_F64_I]])
+// CHECK-NEXT: [[VRND64Z_F642_I:%.*]] = bitcast <1 x double> [[VRND64Z_F641_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND64Z_F642_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]] to <1 x double>
+// CHECK-NEXT: ret <1 x double> [[TMP3]]
+//
float64x1_t test_vrnd64z_f64(float64x1_t a) {
return vrnd64z_f64(a);
}
-// CHECK-LABEL: test_vrnd64zq_f64
-// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
-// CHECK: ret <2 x double> [[RND]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vrnd64zq_f64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRND64ZQ_F64_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRND64ZQ_F641_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> [[VRND64ZQ_F64_I]])
+// CHECK-NEXT: [[VRND64ZQ_F642_I:%.*]] = bitcast <2 x double> [[VRND64ZQ_F641_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRND64ZQ_F642_I]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t test_vrnd64zq_f64(float64x2_t a) {
return vrnd64zq_f64(a);
}
diff --git a/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c
index 7bfeb7939edb2..6fffcb6c6b391 100644
--- a/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.6a -target-feature +i8mm \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
// RUN: | opt -S -passes=mem2reg,sroa \
@@ -7,141 +8,198 @@
#include <arm_neon.h>
-// CHECK-LABEL: test_vmmlaq_s32
-// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <4 x i32> [[VAL]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmmlaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMMLA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> [[VMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]]
+//
int32x4_t test_vmmlaq_s32(int32x4_t r, int8x16_t a, int8x16_t b) {
return vmmlaq_s32(r, a, b);
}
-// CHECK-LABEL: test_vmmlaq_u32
-// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <4 x i32> [[VAL]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vmmlaq_u32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMMLA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> [[VMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]]
+//
uint32x4_t test_vmmlaq_u32(uint32x4_t r, uint8x16_t a, uint8x16_t b) {
return vmmlaq_u32(r, a, b);
}
-// CHECK-LABEL: test_vusmmlaq_s32
-// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <4 x i32> [[VAL]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vusmmlaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSMMLA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VUSMMLA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> [[VUSMMLA_I]], <16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i32> [[VUSMMLA1_I]]
+//
int32x4_t test_vusmmlaq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
return vusmmlaq_s32(r, a, b);
}
-// CHECK-LABEL: test_vusdot_s32
-// CHECK: [[VAL:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <2 x i32> [[VAL]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vusdot_s32(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]]
+//
int32x2_t test_vusdot_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
return vusdot_s32(r, a, b);
}
-// CHECK-LABEL: test_vusdot_lane_s32
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
-// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> [[TMP4]])
-// CHECK: ret <2 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vusdot_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[A]], <8 x i8> [[TMP3]])
+// CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]]
+//
int32x2_t test_vusdot_lane_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
return vusdot_lane_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vsudot_lane_s32
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %0 to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> %1 to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
-// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> [[TMP4]], <8 x i8> %a)
-// CHECK: ret <2 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsudot_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[TMP3]], <8 x i8> [[A]])
+// CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]]
+//
int32x2_t test_vsudot_lane_s32(int32x2_t r, int8x8_t a, uint8x8_t b) {
return vsudot_lane_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vusdot_laneq_s32
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
-// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> [[TMP4]])
-// CHECK: ret <2 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vusdot_laneq_s32(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[A]], <8 x i8> [[TMP3]])
+// CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]]
+//
int32x2_t test_vusdot_laneq_s32(int32x2_t r, uint8x8_t a, int8x16_t b) {
return vusdot_laneq_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vsudot_laneq_s32
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
-// CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> [[TMP4]], <8 x i8> %a)
-// CHECK: ret <2 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <2 x i32> @test_vsudot_laneq_s32(
+// CHECK-SAME: <2 x i32> noundef [[R:%.*]], <8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[R]] to <8 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> [[VUSDOT_I]], <8 x i8> [[TMP3]], <8 x i8> [[A]])
+// CHECK-NEXT: ret <2 x i32> [[VUSDOT1_I]]
+//
int32x2_t test_vsudot_laneq_s32(int32x2_t r, int8x8_t a, uint8x16_t b) {
return vsudot_laneq_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vusdotq_s32
-// CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <4 x i32> [[VAL]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vusdotq_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]]
+//
int32x4_t test_vusdotq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
return vusdotq_s32(r, a, b);
}
-// CHECK-LABEL: test_vusdotq_lane_s32
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
-// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> [[TMP4]])
-// CHECK: ret <4 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vusdotq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[A]], <16 x i8> [[TMP3]])
+// CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]]
+//
int32x4_t test_vusdotq_lane_s32(int32x4_t r, uint8x16_t a, int8x8_t b) {
return vusdotq_lane_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vsudotq_lane_s32
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
-// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> [[TMP4]], <16 x i8> %a)
-// CHECK: ret <4 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsudotq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[TMP3]], <16 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]]
+//
int32x4_t test_vsudotq_lane_s32(int32x4_t r, int8x16_t a, uint8x8_t b) {
return vsudotq_lane_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vusdotq_laneq_s32
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
-// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> [[TMP4]])
-// CHECK: ret <4 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vusdotq_laneq_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[A]], <16 x i8> [[TMP3]])
+// CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]]
+//
int32x4_t test_vusdotq_laneq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
return vusdotq_laneq_s32(r, a, b, 0);
}
-// CHECK-LABEL: test_vsudotq_laneq_s32
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
-// CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> [[TMP4]], <16 x i8> %a)
-// CHECK: ret <4 x i32> [[OP]]
+// CHECK-LABEL: define dso_local <4 x i32> @test_vsudotq_laneq_s32(
+// CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[R]] to <16 x i8>
+// CHECK-NEXT: [[VUSDOT_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[VUSDOT1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> [[VUSDOT_I]], <16 x i8> [[TMP3]], <16 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i32> [[VUSDOT1_I]]
+//
int32x4_t test_vsudotq_laneq_s32(int32x4_t r, int8x16_t a, uint8x16_t b) {
return vsudotq_laneq_s32(r, a, b, 0);
}
diff --git a/clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c b/clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c
index c4f0b78fc6a57..874b1c4f36867 100644
--- a/clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c
+++ b/clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c
@@ -2,11 +2,11 @@
// RUN: %clang_cc1 -triple armv8-arm-none-eabi \
// RUN: -target-feature +neon -target-feature +bf16 -mfloat-abi soft \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// RUN: %clang_cc1 -triple armv8-arm-none-eabi \
// RUN: -target-feature +neon -target-feature +bf16 -mfloat-abi hard \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -14,10 +14,16 @@
// CHECK-LABEL: @test_vbfdot_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b) {
@@ -26,10 +32,16 @@ float32x2_t test_vbfdot_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b) {
// CHECK-LABEL: @test_vbfdotq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b){
@@ -38,19 +50,24 @@ float32x4_t test_vbfdotq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b){
// CHECK-LABEL: @test_vbfdot_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_128:%.*]] = alloca <4 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_128:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_128]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_128]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer
-// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_128]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_128]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
@@ -59,19 +76,24 @@ float32x2_t test_vbfdot_lane_f32(float32x2_t r, bfloat16x4_t a, bfloat16x4_t b){
// CHECK-LABEL: @test_vbfdotq_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_130:%.*]] = alloca <8 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_130:%.*]] = alloca <4 x float>, align 8
-// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_130]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_130]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_130]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_130]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP4]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x bfloat> [[TMP6]] to <8 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
@@ -80,19 +102,24 @@ float32x4_t test_vbfdotq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b
// CHECK-LABEL: @test_vbfdot_laneq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_132:%.*]] = alloca <8 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_132:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: store <8 x bfloat> [[B:%.*]], ptr [[__REINT_132]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[__REINT_132]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <2 x i32> <i32 3, i32 3>
-// CHECK-NEXT: store <2 x float> [[LANE]], ptr [[__REINT1_132]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[__REINT1_132]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[R:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x bfloat> [[TMP3]] to <8 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[R]], <4 x bfloat> [[A]], <4 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP4]], <2 x i32> <i32 3, i32 3>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[LANE]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <4 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[R:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x bfloat> [[A:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x bfloat> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> [[VBFDOT_I]], <4 x bfloat> [[VBFDOT1_I]], <4 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <2 x float> [[VBFDOT3_I]]
//
float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b) {
@@ -101,19 +128,24 @@ float32x2_t test_vbfdot_laneq_f32(float32x2_t r, bfloat16x4_t a, bfloat16x8_t b)
// CHECK-LABEL: @test_vbfdotq_lane_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_126:%.*]] = alloca <4 x bfloat>, align 8
-// CHECK-NEXT: [[__REINT1_126:%.*]] = alloca <4 x float>, align 8
-// CHECK-NEXT: store <4 x bfloat> [[B:%.*]], ptr [[__REINT_126]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[__REINT_126]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> zeroinitializer
-// CHECK-NEXT: store <4 x float> [[LANE]], ptr [[__REINT1_126]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[__REINT1_126]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x bfloat> [[TMP3]] to <16 x i8>
-// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[TMP3]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[B:%.*]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[LANE]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x bfloat> [[TMP6]] to <8 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
+// CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// CHECK-NEXT: [[VBFDOT_I:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// CHECK-NEXT: [[VBFDOT1_I:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT2_I:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFDOT3_I:%.*]] = call <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> [[VBFDOT_I]], <8 x bfloat> [[VBFDOT1_I]], <8 x bfloat> [[VBFDOT2_I]])
// CHECK-NEXT: ret <4 x float> [[VBFDOT3_I]]
//
float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
@@ -122,12 +154,20 @@ float32x4_t test_vbfdotq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
// CHECK-LABEL: @test_vbfmmlaq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMMLAQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmmla(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMMLAQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMMLAQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMMLAQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMMLAQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmmla(<4 x float> [[VBFMMLAQ_F32_I]], <8 x bfloat> [[VBFMMLAQ_F321_I]], <8 x bfloat> [[VBFMMLAQ_F322_I]])
// CHECK-NEXT: [[VBFMMLAQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMMLAQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMMLAQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMMLAQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmmlaq_f32(r, a, b);
@@ -135,12 +175,20 @@ float32x4_t test_vbfmmlaq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-LABEL: @test_vbfmlalbq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlalbq_f32(r, a, b);
@@ -148,12 +196,20 @@ float32x4_t test_vbfmlalbq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-LABEL: @test_vbfmlaltq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[B:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlaltq_f32(r, a, b);
@@ -163,26 +219,34 @@ float32x4_t test_vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
return vbfmlalbq_lane_f32(r, a, b, 0);
@@ -192,26 +256,34 @@ float32x4_t test_vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALBQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALBQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALBQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalb(<4 x float> [[VBFMLALBQ_F32_I]], <8 x bfloat> [[VBFMLALBQ_F321_I]], <8 x bfloat> [[VBFMLALBQ_F322_I]])
// CHECK-NEXT: [[VBFMLALBQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALBQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALBQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALBQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlalbq_laneq_f32(r, a, b, 3);
@@ -221,26 +293,34 @@ float32x4_t test_vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <4 x bfloat> [[B]], i32 0
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) {
return vbfmlaltq_lane_f32(r, a, b, 0);
@@ -250,26 +330,34 @@ float32x4_t test_vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 3
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x bfloat> poison, bfloat [[VGET_LANE]], i32 0
-// CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE3]], i32 1
-// CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <8 x bfloat> [[VECINIT5]], bfloat [[VGET_LANE8]], i32 2
-// CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <8 x bfloat> [[VECINIT10]], bfloat [[VGET_LANE13]], i32 3
-// CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT20:%.*]] = insertelement <8 x bfloat> [[VECINIT15]], bfloat [[VGET_LANE18]], i32 4
-// CHECK-NEXT: [[VGET_LANE23:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT25:%.*]] = insertelement <8 x bfloat> [[VECINIT20]], bfloat [[VGET_LANE23]], i32 5
+// CHECK-NEXT: [[VGET_LANE4:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x bfloat> [[VECINIT]], bfloat [[VGET_LANE4]], i32 1
+// CHECK-NEXT: [[VGET_LANE10:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT12:%.*]] = insertelement <8 x bfloat> [[VECINIT6]], bfloat [[VGET_LANE10]], i32 2
+// CHECK-NEXT: [[VGET_LANE16:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT18:%.*]] = insertelement <8 x bfloat> [[VECINIT12]], bfloat [[VGET_LANE16]], i32 3
+// CHECK-NEXT: [[VGET_LANE22:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT24:%.*]] = insertelement <8 x bfloat> [[VECINIT18]], bfloat [[VGET_LANE22]], i32 4
// CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT25]], bfloat [[VGET_LANE28]], i32 6
-// CHECK-NEXT: [[VGET_LANE33:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
-// CHECK-NEXT: [[VECINIT35:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE33]], i32 7
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT35]] to <16 x i8>
-// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[R]], <8 x bfloat> [[A]], <8 x bfloat> [[VECINIT35]])
+// CHECK-NEXT: [[VECINIT30:%.*]] = insertelement <8 x bfloat> [[VECINIT24]], bfloat [[VGET_LANE28]], i32 5
+// CHECK-NEXT: [[VGET_LANE34:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT36:%.*]] = insertelement <8 x bfloat> [[VECINIT30]], bfloat [[VGET_LANE34]], i32 6
+// CHECK-NEXT: [[VGET_LANE40:%.*]] = extractelement <8 x bfloat> [[B]], i32 3
+// CHECK-NEXT: [[VECINIT42:%.*]] = insertelement <8 x bfloat> [[VECINIT36]], bfloat [[VGET_LANE40]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[R:%.*]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x bfloat> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x bfloat> [[VECINIT42]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[VBFMLALTQ_F32_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VBFMLALTQ_F321_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F322_I:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x bfloat>
+// CHECK-NEXT: [[VBFMLALTQ_F323_I:%.*]] = call <4 x float> @llvm.arm.neon.bfmlalt(<4 x float> [[VBFMLALTQ_F32_I]], <8 x bfloat> [[VBFMLALTQ_F321_I]], <8 x bfloat> [[VBFMLALTQ_F322_I]])
// CHECK-NEXT: [[VBFMLALTQ_F324_I:%.*]] = bitcast <4 x float> [[VBFMLALTQ_F323_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VBFMLALTQ_F323_I]]
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[VBFMLALTQ_F324_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP7]]
//
float32x4_t test_vbfmlaltq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) {
return vbfmlaltq_laneq_f32(r, a, b, 3);
diff --git a/clang/test/CodeGen/arm-bf16-getset-intrinsics.c b/clang/test/CodeGen/arm-bf16-getset-intrinsics.c
index b87d0e8eb68bb..97d51839e2eb6 100644
--- a/clang/test/CodeGen/arm-bf16-getset-intrinsics.c
+++ b/clang/test/CodeGen/arm-bf16-getset-intrinsics.c
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi hard \
-// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi soft \
-// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -47,9 +47,10 @@ bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
// CHECK-LABEL: @test_vdup_lane_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
//
bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
@@ -58,9 +59,10 @@ bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
// CHECK-LABEL: @test_vdupq_lane_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP2]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
//
bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
@@ -69,9 +71,10 @@ bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
// CHECK-LABEL: @test_vdup_laneq_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
//
bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
@@ -80,9 +83,10 @@ bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
// CHECK-LABEL: @test_vdupq_laneq_bf16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x bfloat>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP2]], <8 x bfloat> [[TMP2]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
//
bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index 63ec016b49a0c..be587ea8e697a 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -1,130 +1,353 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 \
// RUN: -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | \
-// RUN: opt -S -passes=mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s
+// RUN: opt -S -passes=mem2reg,sroa | FileCheck -check-prefixes=CHECK,CHECK-A32 %s
// RUN: %clang_cc1 -triple arm64-linux-gnueabihf -target-feature +neon \
// RUN: -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | \
-// RUN: opt -S -passes=mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A64 %s
+// RUN: opt -S -passes=mem2reg,sroa | FileCheck -check-prefixes=CHECK,CHECK-A64 %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrnda_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDA_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrnda_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRNDA_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRNDA_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> [[VRNDA_V_I]])
+// CHECK-A32-NEXT: [[VRNDA_V2_I:%.*]] = bitcast <2 x float> [[VRNDA_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDA_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrnda_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[VRNDA_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDA1_I]]
+//
float32x2_t test_vrnda_f32(float32x2_t a) {
return vrnda_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndaq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDAQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndaq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDAQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDAQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> [[VRNDAQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDAQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDAQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDAQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndaq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[VRNDA_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDA1_I]]
+//
float32x4_t test_vrndaq_f32(float32x4_t a) {
return vrndaq_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndm_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDM_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrndm_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]])
+// CHECK-A32-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrndm_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDM1_I]]
+//
float32x2_t test_vrndm_f32(float32x2_t a) {
return vrndm_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndmq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDMQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndmq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndmq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDM_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDM1_I]]
+//
float32x4_t test_vrndmq_f32(float32x4_t a) {
return vrndmq_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndn_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDN_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrndn_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]])
+// CHECK-A32-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrndn_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> [[VRNDN_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDN1_I]]
+//
float32x2_t test_vrndn_f32(float32x2_t a) {
return vrndn_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndnq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDNQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndnq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndnq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VRNDN_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDN1_I]]
+//
float32x4_t test_vrndnq_f32(float32x4_t a) {
return vrndnq_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndp_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDP_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrndp_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRNDP_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRNDP_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> [[VRNDP_V_I]])
+// CHECK-A32-NEXT: [[VRNDP_V2_I:%.*]] = bitcast <2 x float> [[VRNDP_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDP_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrndp_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[VRNDP_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDP1_I]]
+//
float32x2_t test_vrndp_f32(float32x2_t a) {
return vrndp_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndpq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDPQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndpq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDPQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDPQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> [[VRNDPQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDPQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDPQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDPQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndpq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VRNDP_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDP1_I]]
+//
float32x4_t test_vrndpq_f32(float32x4_t a) {
return vrndpq_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndx_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDX_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrndx_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRNDX_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRNDX_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> [[VRNDX_V_I]])
+// CHECK-A32-NEXT: [[VRNDX_V2_I:%.*]] = bitcast <2 x float> [[VRNDX_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDX_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrndx_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[VRNDX_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDX1_I]]
+//
float32x2_t test_vrndx_f32(float32x2_t a) {
return vrndx_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndxq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDXQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndxq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDXQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> [[VRNDXQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDXQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDXQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDXQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndxq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[VRNDX_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDX1_I]]
+//
float32x4_t test_vrndxq_f32(float32x4_t a) {
return vrndxq_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrnd_f32(<2 x float> noundef %a)
-// CHECK-A32: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRND_V1_I]]
+// CHECK-A32-LABEL: define dso_local <2 x float> @test_vrnd_f32(
+// CHECK-A32-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[VRND_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A32-NEXT: [[VRND_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> [[VRND_V_I]])
+// CHECK-A32-NEXT: [[VRND_V2_I:%.*]] = bitcast <2 x float> [[VRND_V1_I]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRND_V2_I]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: ret <2 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <2 x float> @test_vrnd_f32(
+// CHECK-A64-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-A64-NEXT: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[VRNDZ_I]])
+// CHECK-A64-NEXT: ret <2 x float> [[VRNDZ1_I]]
+//
float32x2_t test_vrnd_f32(float32x2_t a) {
return vrnd_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndq_f32(<4 x float> noundef %a)
-// CHECK-A32: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDQ_V1_I]]
+// CHECK-A32-LABEL: define dso_local <4 x float> @test_vrndq_f32(
+// CHECK-A32-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A32-NEXT: [[VRNDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[VRNDQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> [[VRNDQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDQ_V1_I]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDQ_V2_I]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: ret <4 x float> [[TMP3]]
+//
+// CHECK-A64-LABEL: define dso_local <4 x float> @test_vrndq_f32(
+// CHECK-A64-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A64-NEXT: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VRNDZ_I]])
+// CHECK-A64-NEXT: ret <4 x float> [[VRNDZ1_I]]
+//
float32x4_t test_vrndq_f32(float32x4_t a) {
return vrndq_f32(a);
}
-// CHECK-LABEL: define{{.*}} float @test_vrndns_f32(float noundef %a)
-// CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a)
-// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float %a)
-// CHECK: ret float [[VRNDN_I]]
+// CHECK-A32-LABEL: define dso_local float @test_vrndns_f32(
+// CHECK-A32-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float [[A]])
+// CHECK-A32-NEXT: ret float [[VRNDN_I]]
+//
+// CHECK-A64-LABEL: define dso_local float @test_vrndns_f32(
+// CHECK-A64-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]])
+// CHECK-A64-NEXT: ret float [[VRNDN_I]]
+//
float32_t test_vrndns_f32(float32_t a) {
return vrndns_f32(a);
}
-// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndi_f32(<2 x float> noundef %a)
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRNDI1_I]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vrndi_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRNDI_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRNDI_V1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[VRNDI_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VRNDI_V1_I]]
+//
float32x2_t test_vrndi_f32(float32x2_t a) {
return vrndi_f32(a);
}
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndiq_f32(<4 x float> noundef %a)
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRNDI1_I]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vrndiq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRNDIQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRNDIQ_V1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VRNDIQ_V_I]])
+// CHECK-NEXT: ret <4 x float> [[VRNDIQ_V1_I]]
+//
float32x4_t test_vrndiq_f32(float32x4_t a) {
return vrndiq_f32(a);
}
diff --git a/clang/test/CodeGen/arm-neon-fma.c b/clang/test/CodeGen/arm-neon-fma.c
index 682eda9750c81..663347010eae5 100644
--- a/clang/test/CodeGen/arm-neon-fma.c
+++ b/clang/test/CodeGen/arm-neon-fma.c
@@ -4,7 +4,7 @@
// RUN: -target-cpu cortex-a7 \
// RUN: -mfloat-abi hard \
// RUN: -ffreestanding \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -13,11 +13,17 @@
// CHECK-LABEL: define {{[^@]+}}@test_fma_order
// CHECK-SAME: (<2 x float> noundef [[ACCUM:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACCUM]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LHS]], <2 x float> [[RHS]], <2 x float> [[ACCUM]])
-// CHECK-NEXT: ret <2 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[ACCUM]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[LHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[RHS]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
return vfma_f32(accum, lhs, rhs);
@@ -26,11 +32,17 @@ float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs)
// CHECK-LABEL: define {{[^@]+}}@test_fmaq_order
// CHECK-SAME: (<4 x float> noundef [[ACCUM:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACCUM]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LHS]], <4 x float> [[RHS]], <4 x float> [[ACCUM]])
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[ACCUM]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
return vfmaq_f32(accum, lhs, rhs);
@@ -41,11 +53,17 @@ float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[N]], i32 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[N]], i32 1
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[B]], <2 x float> [[VECINIT1_I]], <2 x float> [[A]])
-// CHECK-NEXT: ret <2 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
//
float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
return vfma_n_f32(a, b, n);
@@ -58,11 +76,17 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[N]], i32 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[N]], i32 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[N]], i32 3
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[VECINIT3_I]], <4 x float> [[A]])
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
//
float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
return vfmaq_n_f32(a, b, n);
diff --git a/clang/test/CodeGen/arm-neon-numeric-maxmin.c b/clang/test/CodeGen/arm-neon-numeric-maxmin.c
index d2d4fee10f079..0b76f3022466f 100644
--- a/clang/test/CodeGen/arm-neon-numeric-maxmin.c
+++ b/clang/test/CodeGen/arm-neon-numeric-maxmin.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
-// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -8,24 +8,36 @@
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnm_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> [[A]], <2 x float> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAXNM_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMAXNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> [[VMAXNM_V_I]], <2 x float> [[VMAXNM_V1_I]])
// CHECK-NEXT: [[VMAXNM_V3_I:%.*]] = bitcast <2 x float> [[VMAXNM_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <2 x float> [[VMAXNM_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VMAXNM_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
//
float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
return vmaxnm_f32(a, b);
}
// CHECK-LABEL: define {{[^@]+}}@test_vmaxnmq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> [[A]], <4 x float> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAXNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMAXNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> [[VMAXNMQ_V_I]], <4 x float> [[VMAXNMQ_V1_I]])
// CHECK-NEXT: [[VMAXNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXNMQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VMAXNMQ_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VMAXNMQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
return vmaxnmq_f32(a, b);
@@ -34,24 +46,36 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vminnm_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <8 x i8>
-// CHECK-NEXT: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> [[A]], <2 x float> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMINNM_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMINNM_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> [[VMINNM_V_I]], <2 x float> [[VMINNM_V1_I]])
// CHECK-NEXT: [[VMINNM_V3_I:%.*]] = bitcast <2 x float> [[VMINNM_V2_I]] to <8 x i8>
-// CHECK-NEXT: ret <2 x float> [[VMINNM_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VMINNM_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
//
float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
return vminnm_f32(a, b);
}
// CHECK-LABEL: define {{[^@]+}}@test_vminnmq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <16 x i8>
-// CHECK-NEXT: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> [[A]], <4 x float> [[B]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMINNMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMINNMQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> [[VMINNMQ_V_I]], <4 x float> [[VMINNMQ_V1_I]])
// CHECK-NEXT: [[VMINNMQ_V3_I:%.*]] = bitcast <4 x float> [[VMINNMQ_V2_I]] to <16 x i8>
-// CHECK-NEXT: ret <4 x float> [[VMINNMQ_V2_I]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VMINNMQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
return vminnmq_f32(a, b);
diff --git a/clang/test/CodeGen/arm-neon-vcvtX.c b/clang/test/CodeGen/arm-neon-vcvtX.c
index c087b92102c5b..17dd97467308b 100644
--- a/clang/test/CodeGen/arm-neon-vcvtX.c
+++ b/clang/test/CodeGen/arm-neon-vcvtX.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
-// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
@@ -8,8 +8,10 @@
// CHECK-LABEL: define {{[^@]+}}@test_vcvta_s32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> [[VCVTA_S32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTA_S32_V1_I]]
//
int32x2_t test_vcvta_s32_f32(float32x2_t a) {
@@ -19,8 +21,10 @@ int32x2_t test_vcvta_s32_f32(float32x2_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvta_u32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTA_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> [[VCVTA_U32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTA_U32_V1_I]]
//
uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
@@ -28,10 +32,12 @@ uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtaq_s32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTAQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> [[VCVTAQ_S32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTAQ_S32_V1_I]]
//
int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
@@ -39,10 +45,12 @@ int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtaq_u32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTAQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> [[VCVTAQ_U32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTAQ_U32_V1_I]]
//
uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
@@ -52,8 +60,10 @@ uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtn_s32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> [[VCVTN_S32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTN_S32_V1_I]]
//
int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
@@ -63,8 +73,10 @@ int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtn_u32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTN_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_U32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTN_U32_V1_I]]
//
uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
@@ -72,10 +84,12 @@ uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtnq_s32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTNQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> [[VCVTNQ_S32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTNQ_S32_V1_I]]
//
int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
@@ -83,10 +97,12 @@ int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtnq_u32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTNQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> [[VCVTNQ_U32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTNQ_U32_V1_I]]
//
uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
@@ -96,8 +112,10 @@ uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtp_s32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> [[VCVTP_S32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTP_S32_V1_I]]
//
int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
@@ -107,8 +125,10 @@ int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtp_u32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTP_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_U32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTP_U32_V1_I]]
//
uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
@@ -116,10 +136,12 @@ uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtpq_s32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTPQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> [[VCVTPQ_S32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTPQ_S32_V1_I]]
//
int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
@@ -127,10 +149,12 @@ int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtpq_u32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTPQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> [[VCVTPQ_U32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTPQ_U32_V1_I]]
//
uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
@@ -140,8 +164,10 @@ uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtm_s32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_S32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> [[VCVTM_S32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTM_S32_V1_I]]
//
int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
@@ -151,8 +177,10 @@ int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
// CHECK-LABEL: define {{[^@]+}}@test_vcvtm_u32_f32
// CHECK-SAME: (<2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <8 x i8>
-// CHECK-NEXT: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVTM_U32_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_U32_V_I]])
// CHECK-NEXT: ret <2 x i32> [[VCVTM_U32_V1_I]]
//
uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
@@ -160,10 +188,12 @@ uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtmq_s32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTMQ_S32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> [[VCVTMQ_S32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTMQ_S32_V1_I]]
//
int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
@@ -171,10 +201,12 @@ int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
}
// CHECK-LABEL: define {{[^@]+}}@test_vcvtmq_u32_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <16 x i8>
-// CHECK-NEXT: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> [[A]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVTMQ_U32_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> [[VCVTMQ_U32_V_I]])
// CHECK-NEXT: ret <4 x i32> [[VCVTMQ_U32_V1_I]]
//
uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
diff --git a/clang/test/CodeGen/arm-neon-vst.c b/clang/test/CodeGen/arm-neon-vst.c
index 6e135ab07af49..eeff0c00221b2 100644
--- a/clang/test/CodeGen/arm-neon-vst.c
+++ b/clang/test/CodeGen/arm-neon-vst.c
@@ -1,1990 +1,2738 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | \
-// RUN: FileCheck -check-prefixes=CHECK,CHECK-A64 %s
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | \
+// RUN: FileCheck -check-prefixes=CHECK-A64 %s
// RUN: %clang_cc1 -triple armv8-none-linux-gnueabi -target-feature +neon \
// RUN: -target-feature +fp16 -disable-O0-optnone -emit-llvm -o - %s | \
-// RUN: opt -S -passes=mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s
+// RUN: opt -S -passes=mem2reg,sroa | FileCheck -check-prefixes=CHECK-A32 %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vst1_f16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF:(half|i16)]]>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4f16.p0(<4 x half> [[TMP4]], <4 x half> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
vst1_f16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_f16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF]]>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr %a, <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x half>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4f16.p0(<4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
vst1_f16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_f16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF]]>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x [[HALF]]>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr %a, <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x half>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x half>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-A64-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x half>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
vst1_f16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_f32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v2f32(ptr %a, <2 x float> [[TMP7]], <2 x float> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> [[TMP4]], <2 x float> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v2f32(ptr [[A]], <2 x float> [[TMP4]], <2 x float> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
vst1_f32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_f32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v2f32(ptr %a, <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x float>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v2f32(ptr [[A]], <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
vst1_f32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_f32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v2f32(ptr %a, <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_f32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <2 x float>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x float>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-A64-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_f32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v2f32(ptr [[A]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
vst1_f32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_p16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
vst1_p16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_p16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr %a, <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
vst1_p16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_p16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr %a, <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
vst1_p16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_p8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
vst1_p8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_p8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
vst1_p8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_p8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_p8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_p8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
vst1_p8_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_s16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
vst1_s16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_s16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr %a, <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
vst1_s16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_s16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr %a, <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
vst1_s16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_s32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v2i32(ptr %a, <2 x i32> [[TMP7]], <2 x i32> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
vst1_s32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_s32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v2i32(ptr %a, <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
vst1_s32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_s32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v2i32(ptr %a, <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
vst1_s32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_s64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v1i64(ptr %a, <1 x i64> [[TMP7]], <1 x i64> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s64_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s64_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v1i64(ptr [[A]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
vst1_s64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_s64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v1i64(ptr %a, <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s64_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s64_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v1i64(ptr [[A]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
vst1_s64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_s64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v1i64(ptr %a, <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s64_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s64_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v1i64(ptr [[A]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
vst1_s64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_s8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
vst1_s8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_s8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
vst1_s8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_s8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_s8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_s8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
vst1_s8_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_u16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
vst1_u16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_u16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr %a, <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
vst1_u16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_u16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr %a, <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x i16>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
vst1_u16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_u32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v2i32(ptr %a, <2 x i32> [[TMP7]], <2 x i32> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
vst1_u32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_u32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v2i32(ptr %a, <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
vst1_u32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_u32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v2i32(ptr %a, <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <2 x i32>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i32>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
vst1_u32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_u64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v1i64(ptr %a, <1 x i64> [[TMP7]], <1 x i64> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u64_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u64_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v1i64(ptr [[A]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
vst1_u64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_u64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v1i64(ptr %a, <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u64_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u64_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v1i64(ptr [[A]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
vst1_u64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_u64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v1i64(ptr %a, <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u64_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <1 x i64>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <1 x i64>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP4]], <1 x i64> [[TMP5]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u64_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK-A32-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK-A32-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v1i64(ptr [[A]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
vst1_u64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1_u8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [2 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
vst1_u8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1_u8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [3 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
vst1_u8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1_u8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[__S1]], ptr align 8 [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1_u8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1_u8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
vst1_u8_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_f16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align [[QALIGN:(16|8)]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr %a, <8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8f16.p0(<8 x half> [[TMP4]], <8 x half> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
vst1q_f16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_f16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr %a, <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x half>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8f16.p0(<8 x half> [[TMP6]], <8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
vst1q_f16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_f16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x [[HALF]]>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x [[HALF]]>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr %a, <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x half>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_0_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_1_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_2_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x half>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[B_COERCE_FCA_3_EXTRACT]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// CHECK-A64-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
vst1q_f16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_f32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4f32(ptr %a, <4 x float> [[TMP7]], <4 x float> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> [[TMP4]], <4 x float> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4f32(ptr [[A]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
vst1q_f32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_f32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4f32(ptr %a, <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x float>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> [[TMP6]], <4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4f32(ptr [[A]], <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
vst1q_f32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_f32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4f32(ptr %a, <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_f32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x float>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_0_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_1_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_2_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x float>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[B_COERCE_FCA_3_EXTRACT]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// CHECK-A64-NEXT: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_f32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4f32(ptr [[A]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
vst1q_f32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_p16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr %a, <8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
vst1q_p16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_p16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr %a, <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
vst1q_p16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_p16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr %a, <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
vst1q_p16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_p8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
vst1q_p8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_p8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
vst1q_p8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_p8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_p8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_p8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
vst1q_p8_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_s16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr %a, <8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
vst1q_s16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_s16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr %a, <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
vst1q_s16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_s16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr %a, <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
vst1q_s16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_s32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i32(ptr %a, <4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
vst1q_s32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_s32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i32(ptr %a, <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
vst1q_s32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_s32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i32(ptr %a, <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
vst1q_s32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_s64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v2i64(ptr %a, <2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s64_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s64_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v2i64(ptr [[A]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
vst1q_s64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_s64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v2i64(ptr %a, <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s64_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s64_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v2i64(ptr [[A]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
vst1q_s64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_s64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v2i64(ptr %a, <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s64_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s64_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v2i64(ptr [[A]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
vst1q_s64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_s8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
vst1q_s8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_s8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
vst1q_s8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_s8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_s8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_s8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
vst1q_s8_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_u16_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr %a, <8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u16_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u16_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
vst1q_u16_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_u16_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr %a, <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u16_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u16_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
vst1q_u16_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_u16_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr %a, <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u16_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i16>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u16_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
vst1q_u16_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_u32_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v4i32(ptr %a, <4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u32_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u32_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
vst1q_u32_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_u32_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v4i32(ptr %a, <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u32_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u32_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
vst1q_u32_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_u32_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v4i32(ptr %a, <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u32_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <4 x i32>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <4 x i32>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u32_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
vst1q_u32_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_u64_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v2i64(ptr %a, <2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u64_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u64_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v2i64(ptr [[A]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
vst1q_u64_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_u64_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v2i64(ptr %a, <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u64_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u64_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v2i64(ptr [[A]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
vst1q_u64_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_u64_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
-// CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
-// CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
-// CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
-// CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v2i64(ptr %a, <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u64_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <2 x i64>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <2 x i64>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_0_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_2_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_COERCE_FCA_3_EXTRACT]] to <16 x i8>
+// CHECK-A64-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A64-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u64_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-A32-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v2i64(ptr [[A]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP7]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
vst1q_u64_x4(a, b);
}
-// CHECK-LABEL: @test_vst1q_u8_x2(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [4 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u8_x2(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u8_x2(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
vst1q_u8_x2(a, b);
}
-// CHECK-LABEL: @test_vst1q_u8_x3(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [6 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u8_x3(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u8_x3(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
vst1q_u8_x3(a, b);
}
-// CHECK-LABEL: @test_vst1q_u8_x4(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align [[QALIGN]]
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align [[QALIGN]]
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK-A32: store [8 x i64] %b.coerce, ptr %coerce.dive, align 8
-// CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align [[QALIGN]] [[__S1]], ptr align [[QALIGN]] [[B]], {{i64|i32}} 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0
-// CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align [[QALIGN]]
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align [[QALIGN]]
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align [[QALIGN]]
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align [[QALIGN]]
-// CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
-// CHECK-A32: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-// CHECK: ret void
+// CHECK-A64-LABEL: define dso_local void @test_vst1q_u8_x4(
+// CHECK-A64-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A64-NEXT: [[ENTRY:.*:]]
+// CHECK-A64-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-A64-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-A64-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-A64-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-A64-NEXT: call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], ptr [[A]])
+// CHECK-A64-NEXT: ret void
+//
+// CHECK-A32-LABEL: define dso_local void @test_vst1q_u8_x4(
+// CHECK-A32-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-A32-NEXT: [[ENTRY:.*:]]
+// CHECK-A32-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-A32-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-A32-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-A32-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-A32-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-A32-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-A32-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-A32-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-A32-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-A32-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-A32-NEXT: call void @llvm.arm.neon.vst1x4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-A32-NEXT: ret void
+//
void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
vst1q_u8_x4(a, b);
}
diff --git a/clang/test/CodeGen/arm64-vrnd-constrained.c b/clang/test/CodeGen/arm64-vrnd-constrained.c
index ccf729a6a25ef..8e61f1ea6a3d0 100644
--- a/clang/test/CodeGen/arm64-vrnd-constrained.c
+++ b/clang/test/CodeGen/arm64-vrnd-constrained.c
@@ -1,43 +1,210 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -emit-llvm -o - %s \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=UNCONSTRAINED %s
+// RUN: | FileCheck --check-prefix=UNCONSTRAINED %s
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -ffp-exception-behavior=strict -emit-llvm -o - %s \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CONSTRAINED %s
-// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -emit-llvm -o - %s | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
-// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -ffp-exception-behavior=strict -emit-llvm -o - %s | llc -o=- - \
-// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
+// RUN: | FileCheck --check-prefix=CONSTRAINED %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
+// UNCONSTRAINED-LABEL: define <2 x double> @rnd5(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]])
+// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDZ1_I]], ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define <2 x double> @rnd5(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> [[VRNDZ_I]], metadata !"fpexcept.strict") #[[ATTR2:[0-9]+]]
+// CONSTRAINED-NEXT: store <2 x double> [[VRNDZ1_I]], ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
-// COMMON-LABEL: rnd5
-// UNCONSTRAINED: call <2 x double> @llvm.trunc.v2f64(<2 x double>
-// CONSTRAINED: call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>
-// CHECK-ASM: frintz.2d v{{[0-9]+}}, v{{[0-9]+}}
+// UNCONSTRAINED-LABEL: define <2 x double> @rnd13(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]])
+// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDM1_I]], ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define <2 x double> @rnd13(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> [[VRNDM_I]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: store <2 x double> [[VRNDM1_I]], ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
-// COMMON-LABEL: rnd13
-// UNCONSTRAINED: call <2 x double> @llvm.floor.v2f64(<2 x double>
-// CONSTRAINED: call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>
-// CHECK-ASM: frintm.2d v{{[0-9]+}}, v{{[0-9]+}}
+// UNCONSTRAINED-LABEL: define <2 x double> @rnd18(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]])
+// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDP1_I]], ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define <2 x double> @rnd18(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> [[VRNDP_I]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: store <2 x double> [[VRNDP1_I]], ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); }
-// COMMON-LABEL: rnd18
-// UNCONSTRAINED: call <2 x double> @llvm.ceil.v2f64(<2 x double>
-// CONSTRAINED: call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>
-// CHECK-ASM: frintp.2d v{{[0-9]+}}, v{{[0-9]+}}
+// UNCONSTRAINED-LABEL: define <2 x double> @rnd22(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]])
+// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDA1_I]], ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define <2 x double> @rnd22(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> [[VRNDA_I]], metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: store <2 x double> [[VRNDA1_I]], ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); }
-// COMMON-LABEL: rnd22
-// UNCONSTRAINED: call <2 x double> @llvm.round.v2f64(<2 x double>
-// CONSTRAINED: call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>
-// CHECK-ASM: frinta.2d v{{[0-9]+}}, v{{[0-9]+}}
+// UNCONSTRAINED-LABEL: define <2 x double> @rnd25(
+// UNCONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// UNCONSTRAINED-NEXT: [[ENTRY:.*:]]
+// UNCONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// UNCONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// UNCONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// UNCONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]])
+// UNCONSTRAINED-NEXT: store <2 x double> [[VRNDX1_I]], ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// UNCONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// UNCONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
+// CONSTRAINED-LABEL: define <2 x double> @rnd25(
+// CONSTRAINED-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CONSTRAINED-NEXT: [[ENTRY:.*:]]
+// CONSTRAINED-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CONSTRAINED-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CONSTRAINED-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CONSTRAINED-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CONSTRAINED-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> [[VRNDX_I]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR2]]
+// CONSTRAINED-NEXT: store <2 x double> [[VRNDX1_I]], ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CONSTRAINED-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CONSTRAINED-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); }
-// COMMON-LABEL: rnd25
-// UNCONSTRAINED: call <2 x double> @llvm.rint.v2f64(<2 x double>
-// CONSTRAINED: call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>
-// CHECK-ASM: frintx.2d v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/clang/test/CodeGen/arm64-vrnd.c b/clang/test/CodeGen/arm64-vrnd.c
index 0059dc43abe0b..2ca4abafc4813 100644
--- a/clang/test/CodeGen/arm64-vrnd.c
+++ b/clang/test/CodeGen/arm64-vrnd.c
@@ -1,24 +1,133 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -emit-llvm -o - %s | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
+// CHECK-LABEL: define <2 x double> @rnd5(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDZ1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
-// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double>
+// CHECK-LABEL: define <2 x double> @rnd9(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> [[VRNDN_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDN1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); }
-// CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double>
+// CHECK-LABEL: define <2 x double> @rnd13(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDM1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
-// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double>
+// CHECK-LABEL: define <2 x double> @rnd18(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDP1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); }
-// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double>
+// CHECK-LABEL: define <2 x double> @rnd22(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDA1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); }
-// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
+// CHECK-LABEL: define <2 x double> @rnd25(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[__P0_ADDR_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[__RET_I:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16
+// CHECK-NEXT: [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// CHECK-NEXT: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]])
+// CHECK-NEXT: store <2 x double> [[VRNDX1_I]], ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[REF_TMP_I]], align 16
+// CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[__RET_I]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[__RET_I]], align 16
+// CHECK-NEXT: ret <2 x double> [[TMP3]]
+//
float64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); }
-// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double>
diff --git a/clang/test/CodeGen/arm64_vcreate.c b/clang/test/CodeGen/arm64_vcreate.c
index 2b6e8e4439167..3641ebc4ec6a7 100644
--- a/clang/test/CodeGen/arm64_vcreate.c
+++ b/clang/test/CodeGen/arm64_vcreate.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -o - -emit-llvm %s | opt -S -passes=mem2reg | FileCheck %s
// Test ARM64 SIMD vcreate intrinsics
@@ -5,9 +6,23 @@
#include <arm_neon.h>
+// CHECK-LABEL: define <2 x float> @test_vcreate_f32(
+// CHECK-SAME: i64 noundef [[A1:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A1_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[__RET:%.*]] = alloca <2 x float>, align 8
+// CHECK-NEXT: [[__PROMOTE:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[TMP:%.*]] = alloca <2 x float>, align 8
+// CHECK-NEXT: store i64 [[A1]], ptr [[A1_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A1_ADDR]], align 8
+// CHECK-NEXT: store i64 [[TMP0]], ptr [[__PROMOTE]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__PROMOTE]], align 8
+// CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[__RET]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[__RET]], align 8
+// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[TMP]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[TMP]], align 8
+// CHECK-NEXT: ret <2 x float> [[TMP3]]
+//
float32x2_t test_vcreate_f32(uint64_t a1) {
- // CHECK: test_vcreate_f32
return vcreate_f32(a1);
- // CHECK: bitcast {{.*}} to <2 x float>
- // CHECK-NEXT: ret
}
diff --git a/clang/test/CodeGen/arm64_vdupq_n_f64.c b/clang/test/CodeGen/arm64_vdupq_n_f64.c
index 2da2d3bc8d075..5159382ba0820 100644
--- a/clang/test/CodeGen/arm64_vdupq_n_f64.c
+++ b/clang/test/CodeGen/arm64_vdupq_n_f64.c
@@ -1,57 +1,74 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -o - -disable-O0-optnone -emit-llvm %s | opt -S -passes=mem2reg | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -o - -disable-O0-optnone -emit-llvm %s | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
// vdupq_n_f64 -> dup.2d v0, v0[0]
+// CHECK-LABEL: define <2 x double> @test_vdupq_n_f64(
+// CHECK-SAME: double noundef [[W:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[W]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[W]], i32 1
+// CHECK-NEXT: ret <2 x double> [[VECINIT1_I]]
//
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vdupq_n_f64(double noundef %w) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double %w, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %w, i32 1
-// CHECK: ret <2 x double> [[VECINIT1_I]]
float64x2_t test_vdupq_n_f64(float64_t w) {
return vdupq_n_f64(w);
}
// might as well test this while we're here
// vdupq_n_f32 -> dup.4s v0, v0[0]
-// CHECK-LABEL: define{{.*}} <4 x float> @test_vdupq_n_f32(float noundef %w) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %w, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %w, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %w, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %w, i32 3
-// CHECK: ret <4 x float> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(
+// CHECK-SAME: float noundef [[W:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[W]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[W]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[W]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[W]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VECINIT3_I]]
+//
float32x4_t test_vdupq_n_f32(float32_t w) {
return vdupq_n_f32(w);
}
// vdupq_lane_f64 -> dup.2d v0, v0[0]
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vdupq_lane_f64(<1 x double> noundef %V) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %V to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x double> [[SHUFFLE]]
+// CHECK-LABEL: define <2 x double> @test_vdupq_lane_f64(
+// CHECK-SAME: <1 x double> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to i64
+// CHECK-NEXT: [[__S0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__S0_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP2]], <1 x double> [[TMP2]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x double> [[LANE]]
+//
float64x2_t test_vdupq_lane_f64(float64x1_t V) {
return vdupq_lane_f64(V, 0);
}
// vmovq_n_f64 -> dup Vd.2d,X0
-// CHECK-LABEL: define{{.*}} <2 x double> @test_vmovq_n_f64(double noundef %w) #0 {
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double %w, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %w, i32 1
-// CHECK: ret <2 x double> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x double> @test_vmovq_n_f64(
+// CHECK-SAME: double noundef [[W:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[W]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[W]], i32 1
+// CHECK-NEXT: ret <2 x double> [[VECINIT1_I]]
+//
float64x2_t test_vmovq_n_f64(float64_t w) {
return vmovq_n_f64(w);
}
-// CHECK-LABEL: define{{.*}} <4 x half> @test_vmov_n_f16(ptr noundef %a1) #0 {
-// CHECK: [[TMP0:%.*]] = load half, ptr %a1, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: ret <4 x half> [[VECINIT3]]
+// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(
+// CHECK-SAME: ptr noundef [[A1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A1]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: ret <4 x half> [[VECINIT3]]
+//
float16x4_t test_vmov_n_f16(float16_t *a1) {
return vmov_n_f16(*a1);
}
@@ -62,17 +79,20 @@ float64x1_t test_vmov_n_f64(float64_t a1) {
}
*/
-// CHECK-LABEL: define{{.*}} <8 x half> @test_vmovq_n_f16(ptr noundef %a1) #0 {
-// CHECK: [[TMP0:%.*]] = load half, ptr %a1, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
-// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
-// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
-// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
-// CHECK: ret <8 x half> [[VECINIT7]]
+// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(
+// CHECK-SAME: ptr noundef [[A1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A1]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
+// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
+// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x half> [[VECINIT7]]
+//
float16x8_t test_vmovq_n_f16(float16_t *a1) {
return vmovq_n_f16(*a1);
}
diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c
index 9f43dd2be5af5..eb9fe126ff2a0 100644
--- a/clang/test/CodeGen/arm_neon_intrinsics.c
+++ b/clang/test/CodeGen/arm_neon_intrinsics.c
@@ -1,20011 +1,27310 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
// RUN: -target-cpu swift \
// RUN: -target-feature +fullfp16 -ffreestanding \
// RUN: -flax-vector-conversions=none \
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
-// RUN: | opt -S -passes=mem2reg | FileCheck %s
+// RUN: | opt -S -passes=mem2reg,sroa | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vaba_s8(
-// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vaba_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[VABD_V_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vaba_s8(a, b, c);
}
-// CHECK-LABEL: @test_vaba_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vaba_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vaba_s16(a, b, c);
}
-// CHECK-LABEL: @test_vaba_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vaba_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vaba_s32(a, b, c);
}
-// CHECK-LABEL: @test_vaba_u8(
-// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vaba_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[VABD_V_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vaba_u8(a, b, c);
}
-// CHECK-LABEL: @test_vaba_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vaba_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vaba_u16(a, b, c);
}
-// CHECK-LABEL: @test_vaba_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vaba_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vaba_u32(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_s8(
-// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> [[B]], <16 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[VABDQ_V_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vabaq_s8(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
-// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vabaq_s16(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
-// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vabaq_s32(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_u8(
-// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> [[B]], <16 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[VABDQ_V_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vabaq_u8(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
-// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vabaq_u16(a, b, c);
}
-// CHECK-LABEL: @test_vabaq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
-// CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vabaq_u32(a, b, c);
}
-// CHECK-LABEL: @test_vabal_s8(
-// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabal_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vabal_s8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vabal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vabal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vabal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u8(
-// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabal_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vabal_u8(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabal_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vabal_u16(a, b, c);
}
-// CHECK-LABEL: @test_vabal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vabal_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vabal_u32(a, b, c);
}
-// CHECK-LABEL: @test_vabd_s8(
-// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VABD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vabd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VABD_V_I]]
+//
int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
return vabd_s8(a, b);
}
-// CHECK-LABEL: @test_vabd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VABD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vabd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
return vabd_s16(a, b);
}
-// CHECK-LABEL: @test_vabd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VABD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vabd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
return vabd_s32(a, b);
}
-// CHECK-LABEL: @test_vabd_u8(
-// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VABD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vabd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VABD_V_I]]
+//
uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
return vabd_u8(a, b);
}
-// CHECK-LABEL: @test_vabd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VABD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vabd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
return vabd_u16(a, b);
}
-// CHECK-LABEL: @test_vabd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VABD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vabd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
return vabd_u32(a, b);
}
-// CHECK-LABEL: @test_vabd_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VABD_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vabd_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> [[VABD_V_I]], <2 x float> [[VABD_V1_I]])
+// CHECK-NEXT: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
return vabd_f32(a, b);
}
-// CHECK-LABEL: @test_vabdq_s8(
-// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VABDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VABDQ_V_I]]
+//
int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
return vabdq_s8(a, b);
}
-// CHECK-LABEL: @test_vabdq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VABDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
return vabdq_s16(a, b);
}
-// CHECK-LABEL: @test_vabdq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VABDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
return vabdq_s32(a, b);
}
-// CHECK-LABEL: @test_vabdq_u8(
-// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VABDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VABDQ_V_I]]
+//
uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
return vabdq_u8(a, b);
}
-// CHECK-LABEL: @test_vabdq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VABDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
return vabdq_u16(a, b);
}
-// CHECK-LABEL: @test_vabdq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VABDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
return vabdq_u32(a, b);
}
-// CHECK-LABEL: @test_vabdq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VABDQ_V2_I]]
+// CHECK-LABEL: define <4 x float> @test_vabdq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> [[VABDQ_V_I]], <4 x float> [[VABDQ_V1_I]])
+// CHECK-NEXT: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
return vabdq_f32(a, b);
}
-// CHECK-LABEL: @test_vabdl_s8(
-// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
return vabdl_s8(a, b);
}
-// CHECK-LABEL: @test_vabdl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
return vabdl_s16(a, b);
}
-// CHECK-LABEL: @test_vabdl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I]]
+// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
return vabdl_s32(a, b);
}
-// CHECK-LABEL: @test_vabdl_u8(
-// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
return vabdl_u8(a, b);
}
-// CHECK-LABEL: @test_vabdl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
return vabdl_u16(a, b);
}
-// CHECK-LABEL: @test_vabdl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I_I]]
+// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]])
+// CHECK-NEXT: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
return vabdl_u32(a, b);
}
-// CHECK-LABEL: @test_vabs_s8(
-// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VABS_I]]
+// CHECK-LABEL: define <8 x i8> @test_vabs_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VABS_I]]
+//
int8x8_t test_vabs_s8(int8x8_t a) {
return vabs_s8(a);
}
-// CHECK-LABEL: @test_vabs_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
-// CHECK: ret <4 x i16> [[VABS1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vabs_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> [[VABS_I]])
+// CHECK-NEXT: ret <4 x i16> [[VABS1_I]]
+//
int16x4_t test_vabs_s16(int16x4_t a) {
return vabs_s16(a);
}
-// CHECK-LABEL: @test_vabs_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VABS1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vabs_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> [[VABS_I]])
+// CHECK-NEXT: ret <2 x i32> [[VABS1_I]]
+//
int32x2_t test_vabs_s32(int32x2_t a) {
return vabs_s32(a);
}
-// CHECK-LABEL: @test_vabs_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VABS1_I]]
+// CHECK-LABEL: define <2 x float> @test_vabs_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]])
+// CHECK-NEXT: ret <2 x float> [[VABS1_I]]
+//
float32x2_t test_vabs_f32(float32x2_t a) {
return vabs_f32(a);
}
-// CHECK-LABEL: @test_vabsq_s8(
-// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VABS_I]]
+// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VABS_I]]
+//
int8x16_t test_vabsq_s8(int8x16_t a) {
return vabsq_s8(a);
}
-// CHECK-LABEL: @test_vabsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
-// CHECK: ret <8 x i16> [[VABS1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> [[VABS_I]])
+// CHECK-NEXT: ret <8 x i16> [[VABS1_I]]
+//
int16x8_t test_vabsq_s16(int16x8_t a) {
return vabsq_s16(a);
}
-// CHECK-LABEL: @test_vabsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VABS1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> [[VABS_I]])
+// CHECK-NEXT: ret <4 x i32> [[VABS1_I]]
+//
int32x4_t test_vabsq_s32(int32x4_t a) {
return vabsq_s32(a);
}
-// CHECK-LABEL: @test_vabsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VABS1_I]]
+// CHECK-LABEL: define <4 x float> @test_vabsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]])
+// CHECK-NEXT: ret <4 x float> [[VABS1_I]]
+//
float32x4_t test_vabsq_f32(float32x4_t a) {
return vabsq_f32(a);
}
-// CHECK-LABEL: @test_vadd_s8(
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
return vadd_s8(a, b);
}
-// CHECK-LABEL: @test_vadd_s16(
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
return vadd_s16(a, b);
}
-// CHECK-LABEL: @test_vadd_s32(
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
return vadd_s32(a, b);
}
-// CHECK-LABEL: @test_vadd_s64(
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vadd_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
return vadd_s64(a, b);
}
-// CHECK-LABEL: @test_vadd_f32(
-// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
-// CHECK: ret <2 x float> [[ADD_I]]
+// CHECK-LABEL: define <2 x float> @test_vadd_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
return vadd_f32(a, b);
}
-// CHECK-LABEL: @test_vadd_u8(
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
return vadd_u8(a, b);
}
-// CHECK-LABEL: @test_vadd_u16(
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
return vadd_u16(a, b);
}
-// CHECK-LABEL: @test_vadd_u32(
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
return vadd_u32(a, b);
}
-// CHECK-LABEL: @test_vadd_u64(
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vadd_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
return vadd_u64(a, b);
}
-// CHECK-LABEL: @test_vaddq_s8(
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
return vaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vaddq_s16(
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
return vaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vaddq_s32(
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
return vaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vaddq_s64(
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
return vaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vaddq_f32(
-// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
-// CHECK: ret <4 x float> [[ADD_I]]
+// CHECK-LABEL: define <4 x float> @test_vaddq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
return vaddq_f32(a, b);
}
-// CHECK-LABEL: @test_vaddq_u8(
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
return vaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vaddq_u16(
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
return vaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vaddq_u32(
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
return vaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vaddq_u64(
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
return vaddq_u64(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VADDHN2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VADDHN2_I]]
+//
int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
return vaddhn_s16(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VADDHN2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VADDHN2_I]]
+//
int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
return vaddhn_s32(a, b);
}
-// CHECK-LABEL: @test_vaddhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VADDHN2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VADDHN2_I]]
+//
int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
return vaddhn_s64(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VADDHN2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VADDHN2_I]]
+//
uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
return vaddhn_u16(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VADDHN2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VADDHN2_I]]
+//
uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
return vaddhn_u32(a, b);
}
-// CHECK-LABEL: @test_vaddhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
-// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
-// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VADDHN2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VADDHN2_I]]
+//
uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
return vaddhn_u64(a, b);
}
-// CHECK-LABEL: @test_vaddl_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
return vaddl_s8(a, b);
}
-// CHECK-LABEL: @test_vaddl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
return vaddl_s16(a, b);
}
-// CHECK-LABEL: @test_vaddl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
return vaddl_s32(a, b);
}
-// CHECK-LABEL: @test_vaddl_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
return vaddl_u8(a, b);
}
-// CHECK-LABEL: @test_vaddl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
return vaddl_u16(a, b);
}
-// CHECK-LABEL: @test_vaddl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I6:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I6]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
return vaddl_u32(a, b);
}
-// CHECK-LABEL: @test_vaddw_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
return vaddw_s8(a, b);
}
-// CHECK-LABEL: @test_vaddw_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
return vaddw_s16(a, b);
}
-// CHECK-LABEL: @test_vaddw_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
return vaddw_s32(a, b);
}
-// CHECK-LABEL: @test_vaddw_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
return vaddw_u8(a, b);
}
-// CHECK-LABEL: @test_vaddw_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
return vaddw_u16(a, b);
}
-// CHECK-LABEL: @test_vaddw_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
return vaddw_u32(a, b);
}
-// CHECK-LABEL: @test_vand_s8(
-// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[AND_I]]
+// CHECK-LABEL: define <8 x i8> @test_vand_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[AND_I]]
+//
int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
return vand_s8(a, b);
}
-// CHECK-LABEL: @test_vand_s16(
-// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[AND_I]]
+// CHECK-LABEL: define <4 x i16> @test_vand_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[AND_I]]
+//
int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
return vand_s16(a, b);
}
-// CHECK-LABEL: @test_vand_s32(
-// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[AND_I]]
+// CHECK-LABEL: define <2 x i32> @test_vand_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[AND_I]]
+//
int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
return vand_s32(a, b);
}
-// CHECK-LABEL: @test_vand_s64(
-// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[AND_I]]
+// CHECK-LABEL: define <1 x i64> @test_vand_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[AND_I]]
+//
int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
return vand_s64(a, b);
}
-// CHECK-LABEL: @test_vand_u8(
-// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[AND_I]]
+// CHECK-LABEL: define <8 x i8> @test_vand_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[AND_I]]
+//
uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
return vand_u8(a, b);
}
-// CHECK-LABEL: @test_vand_u16(
-// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[AND_I]]
+// CHECK-LABEL: define <4 x i16> @test_vand_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[AND_I]]
+//
uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
return vand_u16(a, b);
}
-// CHECK-LABEL: @test_vand_u32(
-// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[AND_I]]
+// CHECK-LABEL: define <2 x i32> @test_vand_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[AND_I]]
+//
uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
return vand_u32(a, b);
}
-// CHECK-LABEL: @test_vand_u64(
-// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[AND_I]]
+// CHECK-LABEL: define <1 x i64> @test_vand_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[AND_I]]
+//
uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
return vand_u64(a, b);
}
-// CHECK-LABEL: @test_vandq_s8(
-// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[AND_I]]
+// CHECK-LABEL: define <16 x i8> @test_vandq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[AND_I]]
+//
int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
return vandq_s8(a, b);
}
-// CHECK-LABEL: @test_vandq_s16(
-// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[AND_I]]
+// CHECK-LABEL: define <8 x i16> @test_vandq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[AND_I]]
+//
int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
return vandq_s16(a, b);
}
-// CHECK-LABEL: @test_vandq_s32(
-// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[AND_I]]
+// CHECK-LABEL: define <4 x i32> @test_vandq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[AND_I]]
+//
int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
return vandq_s32(a, b);
}
-// CHECK-LABEL: @test_vandq_s64(
-// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[AND_I]]
+// CHECK-LABEL: define <2 x i64> @test_vandq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[AND_I]]
+//
int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
return vandq_s64(a, b);
}
-// CHECK-LABEL: @test_vandq_u8(
-// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[AND_I]]
+// CHECK-LABEL: define <16 x i8> @test_vandq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[AND_I]]
+//
uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
return vandq_u8(a, b);
}
-// CHECK-LABEL: @test_vandq_u16(
-// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[AND_I]]
+// CHECK-LABEL: define <8 x i16> @test_vandq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[AND_I]]
+//
uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
return vandq_u16(a, b);
}
-// CHECK-LABEL: @test_vandq_u32(
-// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[AND_I]]
+// CHECK-LABEL: define <4 x i32> @test_vandq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[AND_I]]
+//
uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
return vandq_u32(a, b);
}
-// CHECK-LABEL: @test_vandq_u64(
-// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[AND_I]]
+// CHECK-LABEL: define <2 x i64> @test_vandq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[AND_I]]
+//
uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
return vandq_u64(a, b);
}
-// CHECK-LABEL: @test_vbic_s8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
-// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
-// CHECK: ret <8 x i8> [[AND_I]]
+// CHECK-LABEL: define <8 x i8> @test_vbic_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i8> [[AND_I]]
+//
int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
return vbic_s8(a, b);
}
-// CHECK-LABEL: @test_vbic_s16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
-// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
-// CHECK: ret <4 x i16> [[AND_I]]
+// CHECK-LABEL: define <4 x i16> @test_vbic_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i16> [[AND_I]]
+//
int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
return vbic_s16(a, b);
}
-// CHECK-LABEL: @test_vbic_s32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
-// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
-// CHECK: ret <2 x i32> [[AND_I]]
+// CHECK-LABEL: define <2 x i32> @test_vbic_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i32> [[AND_I]]
+//
int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
return vbic_s32(a, b);
}
-// CHECK-LABEL: @test_vbic_s64(
-// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
-// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
-// CHECK: ret <1 x i64> [[AND_I]]
+// CHECK-LABEL: define <1 x i64> @test_vbic_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <1 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <1 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <1 x i64> [[AND_I]]
+//
int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
return vbic_s64(a, b);
}
-// CHECK-LABEL: @test_vbic_u8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
-// CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
-// CHECK: ret <8 x i8> [[AND_I]]
+// CHECK-LABEL: define <8 x i8> @test_vbic_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i8> [[AND_I]]
+//
uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
return vbic_u8(a, b);
}
-// CHECK-LABEL: @test_vbic_u16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
-// CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
-// CHECK: ret <4 x i16> [[AND_I]]
+// CHECK-LABEL: define <4 x i16> @test_vbic_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i16> [[AND_I]]
+//
uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
return vbic_u16(a, b);
}
-// CHECK-LABEL: @test_vbic_u32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
-// CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
-// CHECK: ret <2 x i32> [[AND_I]]
+// CHECK-LABEL: define <2 x i32> @test_vbic_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i32> [[AND_I]]
+//
uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
return vbic_u32(a, b);
}
-// CHECK-LABEL: @test_vbic_u64(
-// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
-// CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
-// CHECK: ret <1 x i64> [[AND_I]]
+// CHECK-LABEL: define <1 x i64> @test_vbic_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <1 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <1 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <1 x i64> [[AND_I]]
+//
uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
return vbic_u64(a, b);
}
-// CHECK-LABEL: @test_vbicq_s8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
-// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
-// CHECK: ret <16 x i8> [[AND_I]]
+// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <16 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <16 x i8> [[AND_I]]
+//
int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
return vbicq_s8(a, b);
}
-// CHECK-LABEL: @test_vbicq_s16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
-// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
-// CHECK: ret <8 x i16> [[AND_I]]
+// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i16> [[AND_I]]
+//
int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
return vbicq_s16(a, b);
}
-// CHECK-LABEL: @test_vbicq_s32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
-// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
-// CHECK: ret <4 x i32> [[AND_I]]
+// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i32> [[AND_I]]
+//
int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
return vbicq_s32(a, b);
}
-// CHECK-LABEL: @test_vbicq_s64(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
-// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
-// CHECK: ret <2 x i64> [[AND_I]]
+// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i64> [[AND_I]]
+//
int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
return vbicq_s64(a, b);
}
-// CHECK-LABEL: @test_vbicq_u8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
-// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
-// CHECK: ret <16 x i8> [[AND_I]]
+// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <16 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <16 x i8> [[AND_I]]
+//
uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
return vbicq_u8(a, b);
}
-// CHECK-LABEL: @test_vbicq_u16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
-// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
-// CHECK: ret <8 x i16> [[AND_I]]
+// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i16> [[AND_I]]
+//
uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
return vbicq_u16(a, b);
}
-// CHECK-LABEL: @test_vbicq_u32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
-// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
-// CHECK: ret <4 x i32> [[AND_I]]
+// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i32> [[AND_I]]
+//
uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
return vbicq_u32(a, b);
}
-// CHECK-LABEL: @test_vbicq_u64(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
-// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
-// CHECK: ret <2 x i64> [[AND_I]]
+// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i64> [[AND_I]]
+//
uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
return vbicq_u64(a, b);
}
-// CHECK-LABEL: @test_vbsl_s8(
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VBSL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VBSL_V_I]]
+//
int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
return vbsl_s8(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP3]]
+// CHECK-LABEL: define <4 x i16> @test_vbsl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP3]]
+//
int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
return vbsl_s16(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP3]]
+// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
return vbsl_s32(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP3]]
+// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]], <1 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
return vbsl_s64(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_u8(
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VBSL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VBSL_V_I]]
+//
uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vbsl_u8(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP3]]
+// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP3]]
+//
uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vbsl_u16(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP3]]
+// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP3]]
+//
uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vbsl_u32(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP3]]
+// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]], <1 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP3]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
return vbsl_u64(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
-// CHECK: ret <2 x float> [[TMP3]]
+// CHECK-LABEL: define <2 x float> @test_vbsl_f32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]])
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP6]]
+//
float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
return vbsl_f32(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_p8(
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VBSL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VBSL_V_I]]
+//
poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
return vbsl_p8(a, b, c);
}
-// CHECK-LABEL: @test_vbsl_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP3]]
+// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP3]]
+//
poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
return vbsl_p16(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_s8(
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-// CHECK: ret <16 x i8> [[VBSLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+// CHECK-NEXT: ret <16 x i8> [[VBSLQ_V_I]]
+//
int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
return vbslq_s8(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP3]]
+// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP3]]
+//
int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
return vbslq_s16(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
return vbslq_s32(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP3]]
+// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]], <2 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
return vbslq_s64(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_u8(
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-// CHECK: ret <16 x i8> [[VBSLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+// CHECK-NEXT: ret <16 x i8> [[VBSLQ_V_I]]
+//
uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vbslq_u8(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP3]]
+// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP3]]
+//
uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vbslq_u16(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP3]]
+// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP3]]
+//
uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vbslq_u32(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP3]]
+// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]], <2 x i64> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP3]]
+//
uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
return vbslq_u64(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
-// CHECK: ret <4 x float> [[TMP3]]
+// CHECK-LABEL: define <4 x float> @test_vbslq_f32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[C]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]])
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP6]]
+//
float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
return vbslq_f32(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_p8(
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-// CHECK: ret <16 x i8> [[VBSLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+// CHECK-NEXT: ret <16 x i8> [[VBSLQ_V_I]]
+//
poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
return vbslq_p8(a, b, c);
}
-// CHECK-LABEL: @test_vbslq_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP3]]
+// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[C]] to <16 x i8>
+// CHECK-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP3]]
+//
poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
return vbslq_p16(a, b, c);
}
-// CHECK-LABEL: @test_vcage_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x i32> [[VCAGE_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcage_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCAGE_V2_I]]
+//
uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
return vcage_f32(a, b);
}
-// CHECK-LABEL: @test_vcageq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCAGEQ_V2_I]]
+//
uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
return vcageq_f32(a, b);
}
-// CHECK-LABEL: @test_vcagt_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: ret <2 x i32> [[VCAGT_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCAGT_V2_I]]
+//
uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
return vcagt_f32(a, b);
}
-// CHECK-LABEL: @test_vcagtq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCAGTQ_V2_I]]
+//
uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
return vcagtq_f32(a, b);
}
-// CHECK-LABEL: @test_vcale_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
-// CHECK: ret <2 x i32> [[VCALE_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcale_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCALE_V2_I]]
+//
uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
return vcale_f32(a, b);
}
-// CHECK-LABEL: @test_vcaleq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
-// CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCALEQ_V2_I]]
+//
uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
return vcaleq_f32(a, b);
}
-// CHECK-LABEL: @test_vcalt_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
-// CHECK: ret <2 x i32> [[VCALT_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VCALT_V2_I]]
+//
uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
return vcalt_f32(a, b);
}
-// CHECK-LABEL: @test_vcaltq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
-// CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VCALTQ_V2_I]]
+//
uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
return vcaltq_f32(a, b);
}
-// CHECK-LABEL: @test_vceq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vceq_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
return vceq_s8(a, b);
}
-// CHECK-LABEL: @test_vceq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vceq_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
return vceq_s16(a, b);
}
-// CHECK-LABEL: @test_vceq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vceq_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
return vceq_s32(a, b);
}
-// CHECK-LABEL: @test_vceq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vceq_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
return vceq_f32(a, b);
}
-// CHECK-LABEL: @test_vceq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vceq_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
return vceq_u8(a, b);
}
-// CHECK-LABEL: @test_vceq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vceq_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
return vceq_u16(a, b);
}
-// CHECK-LABEL: @test_vceq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vceq_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
return vceq_u32(a, b);
}
-// CHECK-LABEL: @test_vceq_p8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vceq_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
return vceq_p8(a, b);
}
-// CHECK-LABEL: @test_vceqq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
return vceqq_s8(a, b);
}
-// CHECK-LABEL: @test_vceqq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
return vceqq_s16(a, b);
}
-// CHECK-LABEL: @test_vceqq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
return vceqq_s32(a, b);
}
-// CHECK-LABEL: @test_vceqq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
return vceqq_f32(a, b);
}
-// CHECK-LABEL: @test_vceqq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
return vceqq_u8(a, b);
}
-// CHECK-LABEL: @test_vceqq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
return vceqq_u16(a, b);
}
-// CHECK-LABEL: @test_vceqq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
return vceqq_u32(a, b);
}
-// CHECK-LABEL: @test_vceqq_p8(
-// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
return vceqq_p8(a, b);
}
-// CHECK-LABEL: @test_vcge_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcge_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
return vcge_s8(a, b);
}
-// CHECK-LABEL: @test_vcge_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcge_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
return vcge_s16(a, b);
}
-// CHECK-LABEL: @test_vcge_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcge_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
return vcge_s32(a, b);
}
-// CHECK-LABEL: @test_vcge_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcge_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
return vcge_f32(a, b);
}
-// CHECK-LABEL: @test_vcge_u8(
-// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcge_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
return vcge_u8(a, b);
}
-// CHECK-LABEL: @test_vcge_u16(
-// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcge_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
return vcge_u16(a, b);
}
-// CHECK-LABEL: @test_vcge_u32(
-// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcge_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
return vcge_u32(a, b);
}
-// CHECK-LABEL: @test_vcgeq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
return vcgeq_s8(a, b);
}
-// CHECK-LABEL: @test_vcgeq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
return vcgeq_s16(a, b);
}
-// CHECK-LABEL: @test_vcgeq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
return vcgeq_s32(a, b);
}
-// CHECK-LABEL: @test_vcgeq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
return vcgeq_f32(a, b);
}
-// CHECK-LABEL: @test_vcgeq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
return vcgeq_u8(a, b);
}
-// CHECK-LABEL: @test_vcgeq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
return vcgeq_u16(a, b);
}
-// CHECK-LABEL: @test_vcgeq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
return vcgeq_u32(a, b);
}
-// CHECK-LABEL: @test_vcgt_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
return vcgt_s8(a, b);
}
-// CHECK-LABEL: @test_vcgt_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
return vcgt_s16(a, b);
}
-// CHECK-LABEL: @test_vcgt_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
return vcgt_s32(a, b);
}
-// CHECK-LABEL: @test_vcgt_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
return vcgt_f32(a, b);
}
-// CHECK-LABEL: @test_vcgt_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
return vcgt_u8(a, b);
}
-// CHECK-LABEL: @test_vcgt_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
return vcgt_u16(a, b);
}
-// CHECK-LABEL: @test_vcgt_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
return vcgt_u32(a, b);
}
-// CHECK-LABEL: @test_vcgtq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
return vcgtq_s8(a, b);
}
-// CHECK-LABEL: @test_vcgtq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
return vcgtq_s16(a, b);
}
-// CHECK-LABEL: @test_vcgtq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
return vcgtq_s32(a, b);
}
-// CHECK-LABEL: @test_vcgtq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
return vcgtq_f32(a, b);
}
-// CHECK-LABEL: @test_vcgtq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
return vcgtq_u8(a, b);
}
-// CHECK-LABEL: @test_vcgtq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
return vcgtq_u16(a, b);
}
-// CHECK-LABEL: @test_vcgtq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
return vcgtq_u32(a, b);
}
-// CHECK-LABEL: @test_vcle_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcle_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
return vcle_s8(a, b);
}
-// CHECK-LABEL: @test_vcle_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcle_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
return vcle_s16(a, b);
}
-// CHECK-LABEL: @test_vcle_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcle_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
return vcle_s32(a, b);
}
-// CHECK-LABEL: @test_vcle_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcle_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
return vcle_f32(a, b);
}
-// CHECK-LABEL: @test_vcle_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcle_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
return vcle_u8(a, b);
}
-// CHECK-LABEL: @test_vcle_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcle_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
return vcle_u16(a, b);
}
-// CHECK-LABEL: @test_vcle_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcle_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
return vcle_u32(a, b);
}
-// CHECK-LABEL: @test_vcleq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
return vcleq_s8(a, b);
}
-// CHECK-LABEL: @test_vcleq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
return vcleq_s16(a, b);
}
-// CHECK-LABEL: @test_vcleq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
return vcleq_s32(a, b);
}
-// CHECK-LABEL: @test_vcleq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
return vcleq_f32(a, b);
}
-// CHECK-LABEL: @test_vcleq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
return vcleq_u8(a, b);
}
-// CHECK-LABEL: @test_vcleq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
return vcleq_u16(a, b);
}
-// CHECK-LABEL: @test_vcleq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
return vcleq_u32(a, b);
}
-// CHECK-LABEL: @test_vcls_s8(
-// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCLS_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcls_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCLS_V_I]]
+//
int8x8_t test_vcls_s8(int8x8_t a) {
return vcls_s8(a);
}
-// CHECK-LABEL: @test_vcls_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLS_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcls_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vcls_s16(int16x4_t a) {
return vcls_s16(a);
}
-// CHECK-LABEL: @test_vcls_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLS_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcls_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vcls_s32(int32x2_t a) {
return vcls_s32(a);
}
-// CHECK-LABEL: @test_vcls_u8(
-// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCLS_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcls_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCLS_V_I]]
+//
int8x8_t test_vcls_u8(uint8x8_t a) {
return vcls_u8(a);
}
-// CHECK-LABEL: @test_vcls_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLS_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcls_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vcls_u16(uint16x4_t a) {
return vcls_u16(a);
}
-// CHECK-LABEL: @test_vcls_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
-// CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLS_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcls_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]])
+// CHECK-NEXT: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vcls_u32(uint32x2_t a) {
return vcls_u32(a);
}
-// CHECK-LABEL: @test_vclsq_s8(
-// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCLSQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCLSQ_V_I]]
+//
int8x16_t test_vclsq_s8(int8x16_t a) {
return vclsq_s8(a);
}
-// CHECK-LABEL: @test_vclsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclsq_s16(int16x8_t a) {
return vclsq_s16(a);
}
-// CHECK-LABEL: @test_vclsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclsq_s32(int32x4_t a) {
return vclsq_s32(a);
}
-// CHECK-LABEL: @test_vclsq_u8(
-// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCLSQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vclsq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCLSQ_V_I]]
+//
int8x16_t test_vclsq_u8(uint8x16_t a) {
return vclsq_u8(a);
}
-// CHECK-LABEL: @test_vclsq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vclsq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclsq_u16(uint16x8_t a) {
return vclsq_u16(a);
}
-// CHECK-LABEL: @test_vclsq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
-// CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vclsq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]])
+// CHECK-NEXT: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclsq_u32(uint32x4_t a) {
return vclsq_u32(a);
}
-// CHECK-LABEL: @test_vclt_s8(
-// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vclt_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
return vclt_s8(a, b);
}
-// CHECK-LABEL: @test_vclt_s16(
-// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vclt_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
return vclt_s16(a, b);
}
-// CHECK-LABEL: @test_vclt_s32(
-// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vclt_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
return vclt_s32(a, b);
}
-// CHECK-LABEL: @test_vclt_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vclt_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
return vclt_f32(a, b);
}
-// CHECK-LABEL: @test_vclt_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i8> @test_vclt_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[SEXT_I]]
+//
uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
return vclt_u8(a, b);
}
-// CHECK-LABEL: @test_vclt_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i16> @test_vclt_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[SEXT_I]]
+//
uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
return vclt_u16(a, b);
}
-// CHECK-LABEL: @test_vclt_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vclt_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[SEXT_I]]
+//
uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
return vclt_u32(a, b);
}
-// CHECK-LABEL: @test_vcltq_s8(
-// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
return vcltq_s8(a, b);
}
-// CHECK-LABEL: @test_vcltq_s16(
-// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
return vcltq_s16(a, b);
}
-// CHECK-LABEL: @test_vcltq_s32(
-// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
return vcltq_s32(a, b);
}
-// CHECK-LABEL: @test_vcltq_f32(
-// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
return vcltq_f32(a, b);
}
-// CHECK-LABEL: @test_vcltq_u8(
-// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
-// CHECK: ret <16 x i8> [[SEXT_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[SEXT_I]]
+//
uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
return vcltq_u8(a, b);
}
-// CHECK-LABEL: @test_vcltq_u16(
-// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
-// CHECK: ret <8 x i16> [[SEXT_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[SEXT_I]]
+//
uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
return vcltq_u16(a, b);
}
-// CHECK-LABEL: @test_vcltq_u32(
-// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
-// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
-// CHECK: ret <4 x i32> [[SEXT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
+//
uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
return vcltq_u32(a, b);
}
-// CHECK-LABEL: @test_vclz_s8(
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vclz_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
int8x8_t test_vclz_s8(int8x8_t a) {
return vclz_s8(a);
}
-// CHECK-LABEL: @test_vclz_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vclz_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vclz_s16(int16x4_t a) {
return vclz_s16(a);
}
-// CHECK-LABEL: @test_vclz_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vclz_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vclz_s32(int32x2_t a) {
return vclz_s32(a);
}
-// CHECK-LABEL: @test_vclz_u8(
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vclz_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
uint8x8_t test_vclz_u8(uint8x8_t a) {
return vclz_u8(a);
}
-// CHECK-LABEL: @test_vclz_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vclz_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vclz_u16(uint16x4_t a) {
return vclz_u16(a);
}
-// CHECK-LABEL: @test_vclz_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vclz_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vclz_u32(uint32x2_t a) {
return vclz_u32(a);
}
-// CHECK-LABEL: @test_vclzq_s8(
-// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
-// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <16 x i8> [[VCLZQ_V_I]]
+//
int8x16_t test_vclzq_s8(int8x16_t a) {
return vclzq_s8(a);
}
-// CHECK-LABEL: @test_vclzq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vclzq_s16(int16x8_t a) {
return vclzq_s16(a);
}
-// CHECK-LABEL: @test_vclzq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vclzq_s32(int32x4_t a) {
return vclzq_s32(a);
}
-// CHECK-LABEL: @test_vclzq_u8(
-// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
-// CHECK: ret <16 x i8> [[VCLZQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A]], i1 false)
+// CHECK-NEXT: ret <16 x i8> [[VCLZQ_V_I]]
+//
uint8x16_t test_vclzq_u8(uint8x16_t a) {
return vclzq_u8(a);
}
-// CHECK-LABEL: @test_vclzq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
uint16x8_t test_vclzq_u16(uint16x8_t a) {
return vclzq_u16(a);
}
-// CHECK-LABEL: @test_vclzq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
-// CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
uint32x4_t test_vclzq_u32(uint32x4_t a) {
return vclzq_u32(a);
}
-// CHECK-LABEL: @test_vcnt_u8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
uint8x8_t test_vcnt_u8(uint8x8_t a) {
return vcnt_u8(a);
}
-// CHECK-LABEL: @test_vcnt_s8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
int8x8_t test_vcnt_s8(int8x8_t a) {
return vcnt_s8(a);
}
-// CHECK-LABEL: @test_vcnt_p8(
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
poly8x8_t test_vcnt_p8(poly8x8_t a) {
return vcnt_p8(a);
}
-// CHECK-LABEL: @test_vcntq_u8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
uint8x16_t test_vcntq_u8(uint8x16_t a) {
return vcntq_u8(a);
}
-// CHECK-LABEL: @test_vcntq_s8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
int8x16_t test_vcntq_s8(int8x16_t a) {
return vcntq_s8(a);
}
-// CHECK-LABEL: @test_vcntq_p8(
-// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VCNTQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VCNTQ_V_I]]
+//
poly8x16_t test_vcntq_p8(poly8x16_t a) {
return vcntq_p8(a);
}
-// CHECK-LABEL: @test_vcombine_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcombine_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
return vcombine_s8(a, b);
}
-// CHECK-LABEL: @test_vcombine_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcombine_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
return vcombine_s16(a, b);
}
-// CHECK-LABEL: @test_vcombine_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcombine_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
return vcombine_s32(a, b);
}
-// CHECK-LABEL: @test_vcombine_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i64> @test_vcombine_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> [[A]], <1 x i64> [[B]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
return vcombine_s64(a, b);
}
-// CHECK-LABEL: @test_vcombine_f16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x half> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x half> @test_vcombine_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]]
+//
float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
return vcombine_f16(a, b);
}
-// CHECK-LABEL: @test_vcombine_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x float> @test_vcombine_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
return vcombine_f32(a, b);
}
-// CHECK-LABEL: @test_vcombine_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcombine_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
return vcombine_u8(a, b);
}
-// CHECK-LABEL: @test_vcombine_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcombine_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
return vcombine_u16(a, b);
}
-// CHECK-LABEL: @test_vcombine_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcombine_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
return vcombine_u32(a, b);
}
-// CHECK-LABEL: @test_vcombine_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i64> @test_vcombine_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> [[A]], <1 x i64> [[B]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[SHUFFLE_I]]
+//
uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
return vcombine_u64(a, b);
}
-// CHECK-LABEL: @test_vcombine_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vcombine_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
return vcombine_p8(a, b);
}
-// CHECK-LABEL: @test_vcombine_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vcombine_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
return vcombine_p16(a, b);
}
-// CHECK-LABEL: @test_vcreate_s8(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_s8(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
int8x8_t test_vcreate_s8(uint64_t a) {
return vclz_s8(vcreate_s8(a));
}
-// CHECK-LABEL: @test_vcreate_imm
-// CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
-// CHECK: ret <4 x i16> [[RES]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_imm(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 0 to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vcreate_imm(void) {
return vcreate_s16(0);
}
-// CHECK-LABEL: @test_vcreate_s16(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_s16(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vcreate_s16(uint64_t a) {
return vclz_s16(vcreate_s16(a));
}
-// CHECK-LABEL: @test_vcreate_s32(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcreate_s32(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vcreate_s32(uint64_t a) {
return vclz_s32(vcreate_s32(a));
}
-// CHECK-LABEL: @test_vcreate_f16(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vcreate_f16(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vcreate_f16(uint64_t a) {
return vcreate_f16(a);
}
-// CHECK-LABEL: @test_vcreate_f32(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vcreate_f32(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vcreate_f32(uint64_t a) {
return vcreate_f32(a);
}
-// CHECK-LABEL: @test_vcreate_u8(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
-// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
-// CHECK: ret <8 x i8> [[VCLZ_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_u8(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
+// CHECK-NEXT: ret <8 x i8> [[VCLZ_V_I]]
+//
int8x8_t test_vcreate_u8(uint64_t a) {
return vclz_s8((int8x8_t)vcreate_u8(a));
}
-// CHECK-LABEL: @test_vcreate_u16(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_u16(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vcreate_u16(uint64_t a) {
return vclz_s16((int16x4_t)vcreate_u16(a));
}
-// CHECK-LABEL: @test_vcreate_u32(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
-// CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VCLZ_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcreate_u32(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false)
+// CHECK-NEXT: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vcreate_u32(uint64_t a) {
return vclz_s32((int32x2_t)vcreate_u32(a));
}
-// CHECK-LABEL: @test_vcreate_u64(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vcreate_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
uint64x1_t test_vcreate_u64(uint64_t a) {
uint64x1_t tmp = vcreate_u64(a);
return vadd_u64(tmp, tmp);
}
-// CHECK-LABEL: @test_vcreate_p8(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
-// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
-// CHECK: ret <8 x i8> [[VCNT_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vcreate_p8(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
+// CHECK-NEXT: ret <8 x i8> [[VCNT_V_I]]
+//
poly8x8_t test_vcreate_p8(uint64_t a) {
return vcnt_p8(vcreate_p8(a));
}
-// CHECK-LABEL: @test_vcreate_p16(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
-// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP4]]
+// CHECK-LABEL: define <4 x i16> @test_vcreate_p16(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
poly16x4_t test_vcreate_p16(uint64_t a) {
poly16x4_t tmp = vcreate_p16(a);
return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
}
-// CHECK-LABEL: @test_vcreate_s64(
-// CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vcreate_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <1 x i64>
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vcreate_s64(uint64_t a) {
int64x1_t tmp = vcreate_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: @test_vcvt_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
-// CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
-// CHECK: ret <4 x half> [[TMP1]]
+// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]])
+// CHECK-NEXT: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
float16x4_t test_vcvt_f16_f32(float32x4_t a) {
return vcvt_f16_f32(a);
}
-// CHECK-LABEL: @test_vcvt_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[VCVT_I]]
+// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+//
float32x2_t test_vcvt_f32_s32(int32x2_t a) {
return vcvt_f32_s32(a);
}
-// CHECK-LABEL: @test_vcvt_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[VCVT_I]]
+// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+//
float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
return vcvt_f32_u32(a);
}
-// CHECK-LABEL: @test_vcvtq_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[VCVT_I]]
+// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[VCVT_I]]
+//
float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
return vcvtq_f32_s32(a);
}
-// CHECK-LABEL: @test_vcvtq_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[VCVT_I]]
+// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[VCVT_I]]
+//
float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
return vcvtq_f32_u32(a);
}
-// CHECK-LABEL: @test_vcvt_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
-// CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VCVT_F32_F161_I]]
+// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
+// CHECK-NEXT: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
float32x4_t test_vcvt_f32_f16(float16x4_t a) {
return vcvt_f32_f16(a);
}
-// CHECK-LABEL: @test_vcvt_n_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
-// CHECK: ret <2 x float> [[VCVT_N1]]
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
+// CHECK-NEXT: ret <2 x float> [[VCVT_N1]]
+//
float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
return vcvt_n_f32_s32(a, 1);
}
-// CHECK-LABEL: @test_vcvt_n_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
-// CHECK: ret <2 x float> [[VCVT_N1]]
+// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
+// CHECK-NEXT: ret <2 x float> [[VCVT_N1]]
+//
float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
return vcvt_n_f32_u32(a, 1);
}
-// CHECK-LABEL: @test_vcvtq_n_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
-// CHECK: ret <4 x float> [[VCVT_N1]]
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
+// CHECK-NEXT: ret <4 x float> [[VCVT_N1]]
+//
float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
return vcvtq_n_f32_s32(a, 3);
}
-// CHECK-LABEL: @test_vcvtq_n_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
-// CHECK: ret <4 x float> [[VCVT_N1]]
+// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
+// CHECK-NEXT: ret <4 x float> [[VCVT_N1]]
+//
float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
return vcvtq_n_f32_u32(a, 3);
}
-// CHECK-LABEL: @test_vcvt_n_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
-// CHECK: ret <2 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VCVT_N1]]
+//
int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
return vcvt_n_s32_f32(a, 1);
}
-// CHECK-LABEL: @test_vcvtq_n_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
-// CHECK: ret <4 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VCVT_N1]]
+//
int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
return vcvtq_n_s32_f32(a, 3);
}
-// CHECK-LABEL: @test_vcvt_n_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
-// CHECK: ret <2 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VCVT_N1]]
+//
uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
return vcvt_n_u32_f32(a, 1);
}
-// CHECK-LABEL: @test_vcvtq_n_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
-// CHECK: ret <4 x i32> [[VCVT_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
+// CHECK-NEXT: ret <4 x i32> [[VCVT_N1]]
+//
uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
return vcvtq_n_u32_f32(a, 3);
}
-// CHECK-LABEL: @test_vcvt_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VCVT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fptosi <2 x float> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCVT_I]]
+//
int32x2_t test_vcvt_s32_f32(float32x2_t a) {
return vcvt_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VCVT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCVT_I]]
+//
int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
return vcvtq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvt_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VCVT_I]]
+// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fptoui <2 x float> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VCVT_I]]
+//
uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
return vcvt_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VCVT_I]]
+// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VCVT_I:%.*]] = fptoui <4 x float> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VCVT_I]]
+//
uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
return vcvtq_u32_f32(a);
}
-// CHECK-LABEL: @test_vdup_lane_u8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
return vdup_lane_u8(a, 7);
}
-// CHECK-LABEL: @test_vdup_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
return vdup_lane_u16(a, 3);
}
-// CHECK-LABEL: @test_vdup_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
return vdup_lane_u32(a, 1);
}
-// CHECK-LABEL: @test_vdup_lane_s8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
int8x8_t test_vdup_lane_s8(int8x8_t a) {
return vdup_lane_s8(a, 7);
}
-// CHECK-LABEL: @test_vdup_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
int16x4_t test_vdup_lane_s16(int16x4_t a) {
return vdup_lane_s16(a, 3);
}
-// CHECK-LABEL: @test_vdup_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
int32x2_t test_vdup_lane_s32(int32x2_t a) {
return vdup_lane_s32(a, 1);
}
-// CHECK-LABEL: @test_vdup_lane_p8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
return vdup_lane_p8(a, 7);
}
-// CHECK-LABEL: @test_vdup_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
return vdup_lane_p16(a, 3);
}
-// CHECK-LABEL: @test_vdup_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: ret <2 x float> [[LANE]]
+// CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: ret <2 x float> [[LANE]]
+//
float32x2_t test_vdup_lane_f32(float32x2_t a) {
return vdup_lane_f32(a, 1);
}
-// CHECK-LABEL: @test_vdupq_lane_u8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <16 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
return vdupq_lane_u8(a, 7);
}
-// CHECK-LABEL: @test_vdupq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
return vdupq_lane_u16(a, 3);
}
-// CHECK-LABEL: @test_vdupq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
return vdupq_lane_u32(a, 1);
}
-// CHECK-LABEL: @test_vdupq_lane_s8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <16 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
int8x16_t test_vdupq_lane_s8(int8x8_t a) {
return vdupq_lane_s8(a, 7);
}
-// CHECK-LABEL: @test_vdupq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
int16x8_t test_vdupq_lane_s16(int16x4_t a) {
return vdupq_lane_s16(a, 3);
}
-// CHECK-LABEL: @test_vdupq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
int32x4_t test_vdupq_lane_s32(int32x2_t a) {
return vdupq_lane_s32(a, 1);
}
-// CHECK-LABEL: @test_vdupq_lane_p8(
-// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-// CHECK: ret <16 x i8> [[SHUFFLE]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
return vdupq_lane_p8(a, 7);
}
-// CHECK-LABEL: @test_vdupq_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
return vdupq_lane_p16(a, 3);
}
-// CHECK-LABEL: @test_vdupq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: ret <4 x float> [[LANE]]
+// CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: ret <4 x float> [[LANE]]
+//
float32x4_t test_vdupq_lane_f32(float32x2_t a) {
return vdupq_lane_f32(a, 1);
}
-// CHECK-LABEL: @test_vdup_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
int64x1_t test_vdup_lane_s64(int64x1_t a) {
return vdup_lane_s64(a, 0);
}
-// CHECK-LABEL: @test_vdup_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
return vdup_lane_u64(a, 0);
}
-// CHECK-LABEL: @test_vdupq_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
int64x2_t test_vdupq_lane_s64(int64x1_t a) {
return vdupq_lane_s64(a, 0);
}
-// CHECK-LABEL: @test_vdupq_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
return vdupq_lane_u64(a, 0);
}
-// CHECK-LABEL: @test_vdup_n_u8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
uint8x8_t test_vdup_n_u8(uint8_t a) {
return vdup_n_u8(a);
}
-// CHECK-LABEL: @test_vdup_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
uint16x4_t test_vdup_n_u16(uint16_t a) {
return vdup_n_u16(a);
}
-// CHECK-LABEL: @test_vdup_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VECINIT1_I]]
+//
uint32x2_t test_vdup_n_u32(uint32_t a) {
return vdup_n_u32(a);
}
-// CHECK-LABEL: @test_vdup_n_s8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
int8x8_t test_vdup_n_s8(int8_t a) {
return vdup_n_s8(a);
}
-// CHECK-LABEL: @test_vdup_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
int16x4_t test_vdup_n_s16(int16_t a) {
return vdup_n_s16(a);
}
-// CHECK-LABEL: @test_vdup_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VECINIT1_I]]
+//
int32x2_t test_vdup_n_s32(int32_t a) {
return vdup_n_s32(a);
}
-// CHECK-LABEL: @test_vdup_n_p8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
poly8x8_t test_vdup_n_p8(poly8_t a) {
return vdup_n_p8(a);
}
-// CHECK-LABEL: @test_vdup_n_p16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
poly16x4_t test_vdup_n_p16(poly16_t a) {
return vdup_n_p16(a);
}
-// CHECK-LABEL: @test_vdup_n_f16(
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: ret <4 x half> [[VECINIT3]]
+// CHECK-LABEL: define <4 x half> @test_vdup_n_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: ret <4 x half> [[VECINIT3]]
+//
float16x4_t test_vdup_n_f16(float16_t *a) {
return vdup_n_f16(*a);
}
-// CHECK-LABEL: @test_vdup_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
-// CHECK: ret <2 x float> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x float> @test_vdup_n_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[A]], i32 1
+// CHECK-NEXT: ret <2 x float> [[VECINIT1_I]]
+//
float32x2_t test_vdup_n_f32(float32_t a) {
return vdup_n_f32(a);
}
-// CHECK-LABEL: @test_vdupq_n_u8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
uint8x16_t test_vdupq_n_u8(uint8_t a) {
return vdupq_n_u8(a);
}
-// CHECK-LABEL: @test_vdupq_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
uint16x8_t test_vdupq_n_u16(uint16_t a) {
return vdupq_n_u16(a);
}
-// CHECK-LABEL: @test_vdupq_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
+//
uint32x4_t test_vdupq_n_u32(uint32_t a) {
return vdupq_n_u32(a);
}
-// CHECK-LABEL: @test_vdupq_n_s8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
int8x16_t test_vdupq_n_s8(int8_t a) {
return vdupq_n_s8(a);
}
-// CHECK-LABEL: @test_vdupq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
int16x8_t test_vdupq_n_s16(int16_t a) {
return vdupq_n_s16(a);
}
-// CHECK-LABEL: @test_vdupq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
+//
int32x4_t test_vdupq_n_s32(int32_t a) {
return vdupq_n_s32(a);
}
-// CHECK-LABEL: @test_vdupq_n_p8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
poly8x16_t test_vdupq_n_p8(poly8_t a) {
return vdupq_n_p8(a);
}
-// CHECK-LABEL: @test_vdupq_n_p16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
poly16x8_t test_vdupq_n_p16(poly16_t a) {
return vdupq_n_p16(a);
}
-// CHECK-LABEL: @test_vdupq_n_f16(
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
-// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
-// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
-// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
-// CHECK: ret <8 x half> [[VECINIT7]]
+// CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
+// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
+// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x half> [[VECINIT7]]
+//
float16x8_t test_vdupq_n_f16(float16_t *a) {
return vdupq_n_f16(*a);
}
-// CHECK-LABEL: @test_vdupq_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
-// CHECK: ret <4 x float> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[A]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VECINIT3_I]]
+//
float32x4_t test_vdupq_n_f32(float32_t a) {
return vdupq_n_f32(a);
}
-// CHECK-LABEL: @test_vdup_n_s64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vdup_n_s64(int64_t a) {
int64x1_t tmp = vdup_n_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: @test_vdup_n_u64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vdup_n_u64(uint64_t a) {
int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: @test_vdupq_n_s64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vdupq_n_s64(int64_t a) {
int64x2_t tmp = vdupq_n_s64(a);
return vaddq_s64(tmp, tmp);
}
-// CHECK-LABEL: @test_vdupq_n_u64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vdupq_n_u64(uint64_t a) {
uint64x2_t tmp = vdupq_n_u64(a);
return vaddq_u64(tmp, tmp);
}
-// CHECK-LABEL: @test_veor_s8(
-// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[XOR_I]]
+// CHECK-LABEL: define <8 x i8> @test_veor_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[XOR_I]]
+//
int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
return veor_s8(a, b);
}
-// CHECK-LABEL: @test_veor_s16(
-// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[XOR_I]]
+// CHECK-LABEL: define <4 x i16> @test_veor_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[XOR_I]]
+//
int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
return veor_s16(a, b);
}
-// CHECK-LABEL: @test_veor_s32(
-// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[XOR_I]]
+// CHECK-LABEL: define <2 x i32> @test_veor_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[XOR_I]]
+//
int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
return veor_s32(a, b);
}
-// CHECK-LABEL: @test_veor_s64(
-// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[XOR_I]]
+// CHECK-LABEL: define <1 x i64> @test_veor_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[XOR_I]]
+//
int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
return veor_s64(a, b);
}
-// CHECK-LABEL: @test_veor_u8(
-// CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[XOR_I]]
+// CHECK-LABEL: define <8 x i8> @test_veor_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[XOR_I]]
+//
uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
return veor_u8(a, b);
}
-// CHECK-LABEL: @test_veor_u16(
-// CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[XOR_I]]
+// CHECK-LABEL: define <4 x i16> @test_veor_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[XOR_I]]
+//
uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
return veor_u16(a, b);
}
-// CHECK-LABEL: @test_veor_u32(
-// CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[XOR_I]]
+// CHECK-LABEL: define <2 x i32> @test_veor_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[XOR_I]]
+//
uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
return veor_u32(a, b);
}
-// CHECK-LABEL: @test_veor_u64(
-// CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[XOR_I]]
+// CHECK-LABEL: define <1 x i64> @test_veor_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[XOR_I]]
+//
uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
return veor_u64(a, b);
}
-// CHECK-LABEL: @test_veorq_s8(
-// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[XOR_I]]
+// CHECK-LABEL: define <16 x i8> @test_veorq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[XOR_I]]
+//
int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
return veorq_s8(a, b);
}
-// CHECK-LABEL: @test_veorq_s16(
-// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[XOR_I]]
+// CHECK-LABEL: define <8 x i16> @test_veorq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[XOR_I]]
+//
int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
return veorq_s16(a, b);
}
-// CHECK-LABEL: @test_veorq_s32(
-// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[XOR_I]]
+// CHECK-LABEL: define <4 x i32> @test_veorq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[XOR_I]]
+//
int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
return veorq_s32(a, b);
}
-// CHECK-LABEL: @test_veorq_s64(
-// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[XOR_I]]
+// CHECK-LABEL: define <2 x i64> @test_veorq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[XOR_I]]
+//
int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
return veorq_s64(a, b);
}
-// CHECK-LABEL: @test_veorq_u8(
-// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[XOR_I]]
+// CHECK-LABEL: define <16 x i8> @test_veorq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[XOR_I]]
+//
uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
return veorq_u8(a, b);
}
-// CHECK-LABEL: @test_veorq_u16(
-// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[XOR_I]]
+// CHECK-LABEL: define <8 x i16> @test_veorq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[XOR_I]]
+//
uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
return veorq_u16(a, b);
}
-// CHECK-LABEL: @test_veorq_u32(
-// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[XOR_I]]
+// CHECK-LABEL: define <4 x i32> @test_veorq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[XOR_I]]
+//
uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
return veorq_u32(a, b);
}
-// CHECK-LABEL: @test_veorq_u64(
-// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[XOR_I]]
+// CHECK-LABEL: define <2 x i64> @test_veorq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[XOR_I]]
+//
uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
return veorq_u64(a, b);
}
-// CHECK-LABEL: @test_vext_s8(
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define <8 x i8> @test_vext_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
return vext_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vext_u8(
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define <8 x i8> @test_vext_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
return vext_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vext_p8(
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i8> [[VEXT]]
+// CHECK-LABEL: define <8 x i8> @test_vext_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i8> [[VEXT]]
+//
poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
return vext_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vext_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define <4 x i16> @test_vext_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
return vext_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vext_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define <4 x i16> @test_vext_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
return vext_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vext_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i16> [[VEXT]]
+// CHECK-LABEL: define <4 x i16> @test_vext_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i16> [[VEXT]]
+//
poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
return vext_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vext_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i32> [[VEXT]]
+// CHECK-LABEL: define <2 x i32> @test_vext_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[VEXT]]
+//
int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
return vext_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vext_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i32> [[VEXT]]
+// CHECK-LABEL: define <2 x i32> @test_vext_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i32> [[VEXT]]
+//
uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
return vext_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vext_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[VEXT]]
+// CHECK-LABEL: define <1 x i64> @test_vext_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[VEXT]]
+//
int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
return vext_s64(a, b, 0);
}
-// CHECK-LABEL: @test_vext_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[VEXT]]
+// CHECK-LABEL: define <1 x i64> @test_vext_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[VEXT]]
+//
uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
return vext_u64(a, b, 0);
}
-// CHECK-LABEL: @test_vext_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x float> [[VEXT]]
+// CHECK-LABEL: define <2 x float> @test_vext_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x float> [[VEXT]]
+//
float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
return vext_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vextq_s8(
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define <16 x i8> @test_vextq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
return vextq_s8(a, b, 15);
}
-// CHECK-LABEL: @test_vextq_u8(
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define <16 x i8> @test_vextq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
return vextq_u8(a, b, 15);
}
-// CHECK-LABEL: @test_vextq_p8(
-// CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
-// CHECK: ret <16 x i8> [[VEXT]]
+// CHECK-LABEL: define <16 x i8> @test_vextq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+// CHECK-NEXT: ret <16 x i8> [[VEXT]]
+//
poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
return vextq_p8(a, b, 15);
}
-// CHECK-LABEL: @test_vextq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define <8 x i16> @test_vextq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
return vextq_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vextq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define <8 x i16> @test_vextq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
return vextq_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vextq_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-// CHECK: ret <8 x i16> [[VEXT]]
+// CHECK-LABEL: define <8 x i16> @test_vextq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+// CHECK-NEXT: ret <8 x i16> [[VEXT]]
+//
poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
return vextq_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vextq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i32> [[VEXT]]
+// CHECK-LABEL: define <4 x i32> @test_vextq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[VEXT]]
+//
int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
return vextq_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vextq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x i32> [[VEXT]]
+// CHECK-LABEL: define <4 x i32> @test_vextq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x i32> [[VEXT]]
+//
uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
return vextq_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vextq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i64> [[VEXT]]
+// CHECK-LABEL: define <2 x i64> @test_vextq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[VEXT]]
+//
int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
return vextq_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vextq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
-// CHECK: ret <2 x i64> [[VEXT]]
+// CHECK-LABEL: define <2 x i64> @test_vextq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
+// CHECK-NEXT: ret <2 x i64> [[VEXT]]
+//
uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
return vextq_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vextq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-// CHECK: ret <4 x float> [[VEXT]]
+// CHECK-LABEL: define <4 x float> @test_vextq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-NEXT: ret <4 x float> [[VEXT]]
+//
float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
return vextq_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vfma_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
-// CHECK: ret <2 x float> [[TMP3]]
+// CHECK-LABEL: define <2 x float> @test_vfma_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vfma_f32(a, b, c);
}
-// CHECK-LABEL: @test_vfmaq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
-// CHECK: ret <4 x float> [[TMP3]]
+// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[C]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vfmaq_f32(a, b, c);
}
-// CHECK-LABEL: @test_vfms_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x float> %b
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
-// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
-// CHECK: ret <2 x float> [[TMP3]]
+// CHECK-LABEL: define <2 x float> @test_vfms_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[B]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP6]])
+// CHECK-NEXT: ret <2 x float> [[TMP9]]
+//
float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vfms_f32(a, b, c);
}
-// CHECK-LABEL: @test_vfmsq_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <4 x float> %b
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
-// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
-// CHECK: ret <4 x float> [[TMP3]]
+// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[B]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[C]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP6]])
+// CHECK-NEXT: ret <4 x float> [[TMP9]]
+//
float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vfmsq_f32(a, b, c);
}
-// CHECK-LABEL: @test_vget_high_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_high_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vget_high_s8(int8x16_t a) {
return vget_high_s8(a);
}
-// CHECK-LABEL: @test_vget_high_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_high_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vget_high_s16(int16x8_t a) {
return vget_high_s16(a);
}
-// CHECK-LABEL: @test_vget_high_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vget_high_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vget_high_s32(int32x4_t a) {
return vget_high_s32(a);
}
-// CHECK-LABEL: @test_vget_high_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
-// CHECK: ret <1 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <1 x i64> @test_vget_high_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> <i32 1>
+// CHECK-NEXT: ret <1 x i64> [[SHUFFLE_I]]
+//
int64x1_t test_vget_high_s64(int64x2_t a) {
return vget_high_s64(a);
}
-// CHECK-LABEL: @test_vget_high_f16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <4 x half> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x half> @test_vget_high_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
+//
float16x4_t test_vget_high_f16(float16x8_t a) {
return vget_high_f16(a);
}
-// CHECK-LABEL: @test_vget_high_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x float> @test_vget_high_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vget_high_f32(float32x4_t a) {
return vget_high_f32(a);
}
-// CHECK-LABEL: @test_vget_high_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_high_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vget_high_u8(uint8x16_t a) {
return vget_high_u8(a);
}
-// CHECK-LABEL: @test_vget_high_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_high_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vget_high_u16(uint16x8_t a) {
return vget_high_u16(a);
}
-// CHECK-LABEL: @test_vget_high_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vget_high_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vget_high_u32(uint32x4_t a) {
return vget_high_u32(a);
}
-// CHECK-LABEL: @test_vget_high_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
-// CHECK: ret <1 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <1 x i64> @test_vget_high_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> <i32 1>
+// CHECK-NEXT: ret <1 x i64> [[SHUFFLE_I]]
+//
uint64x1_t test_vget_high_u64(uint64x2_t a) {
return vget_high_u64(a);
}
-// CHECK-LABEL: @test_vget_high_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_high_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vget_high_p8(poly8x16_t a) {
return vget_high_p8(a);
}
-// CHECK-LABEL: @test_vget_high_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_high_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vget_high_p16(poly16x8_t a) {
return vget_high_p16(a);
}
-// CHECK-LABEL: @test_vget_lane_u8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define zeroext i8 @test_vget_lane_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A]], i32 7
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
uint8_t test_vget_lane_u8(uint8x8_t a) {
return vget_lane_u8(a, 7);
}
-// CHECK-LABEL: @test_vget_lane_u16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define zeroext i16 @test_vget_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[A]], i32 3
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
uint16_t test_vget_lane_u16(uint16x4_t a) {
return vget_lane_u16(a, 3);
}
-// CHECK-LABEL: @test_vget_lane_u32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
-// CHECK: ret i32 [[VGET_LANE]]
+// CHECK-LABEL: define i32 @test_vget_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i32 1
+// CHECK-NEXT: ret i32 [[VGET_LANE]]
+//
uint32_t test_vget_lane_u32(uint32x2_t a) {
return vget_lane_u32(a, 1);
}
-// CHECK-LABEL: @test_vget_lane_s8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define signext i8 @test_vget_lane_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A]], i32 7
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
int8_t test_vget_lane_s8(int8x8_t a) {
return vget_lane_s8(a, 7);
}
-// CHECK-LABEL: @test_vget_lane_s16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define signext i16 @test_vget_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[A]], i32 3
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
int16_t test_vget_lane_s16(int16x4_t a) {
return vget_lane_s16(a, 3);
}
-// CHECK-LABEL: @test_vget_lane_s32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
-// CHECK: ret i32 [[VGET_LANE]]
+// CHECK-LABEL: define i32 @test_vget_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[A]], i32 1
+// CHECK-NEXT: ret i32 [[VGET_LANE]]
+//
int32_t test_vget_lane_s32(int32x2_t a) {
return vget_lane_s32(a, 1);
}
-// CHECK-LABEL: @test_vget_lane_p8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define signext i8 @test_vget_lane_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A]], i32 7
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
poly8_t test_vget_lane_p8(poly8x8_t a) {
return vget_lane_p8(a, 7);
}
-// CHECK-LABEL: @test_vget_lane_p16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define signext i16 @test_vget_lane_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[A]], i32 3
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
poly16_t test_vget_lane_p16(poly16x4_t a) {
return vget_lane_p16(a, 3);
}
-// CHECK-LABEL: @test_vget_lane_f32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
-// CHECK: ret float [[VGET_LANE]]
+// CHECK-LABEL: define float @test_vget_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x float> [[A]], i32 1
+// CHECK-NEXT: ret float [[VGET_LANE]]
+//
float32_t test_vget_lane_f32(float32x2_t a) {
return vget_lane_f32(a, 1);
}
-// CHECK-LABEL: @test_vget_lane_f16(
-// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
-// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
-// CHECK: store <4 x half> %a, ptr [[__REINT_242]], align 8
-// CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_242]], align 8
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
-// CHECK: store i16 [[VGET_LANE]], ptr [[__REINT1_242]], align 2
-// CHECK: [[TMP5:%.*]] = load half, ptr [[__REINT1_242]], align 2
-// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
-// CHECK: ret float [[CONV]]
+// CHECK-LABEL: define float @test_vget_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: ret float [[CONV]]
+//
float32_t test_vget_lane_f16(float16x4_t a) {
return vget_lane_f16(a, 1);
}
-// CHECK-LABEL: @test_vgetq_lane_u8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define zeroext i8 @test_vgetq_lane_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[A]], i32 15
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
uint8_t test_vgetq_lane_u8(uint8x16_t a) {
return vgetq_lane_u8(a, 15);
}
-// CHECK-LABEL: @test_vgetq_lane_u16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define zeroext i16 @test_vgetq_lane_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[A]], i32 7
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
return vgetq_lane_u16(a, 7);
}
-// CHECK-LABEL: @test_vgetq_lane_u32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
-// CHECK: ret i32 [[VGET_LANE]]
+// CHECK-LABEL: define i32 @test_vgetq_lane_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i32 3
+// CHECK-NEXT: ret i32 [[VGET_LANE]]
+//
uint32_t test_vgetq_lane_u32(uint32x4_t a) {
return vgetq_lane_u32(a, 3);
}
-// CHECK-LABEL: @test_vgetq_lane_s8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define signext i8 @test_vgetq_lane_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[A]], i32 15
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
int8_t test_vgetq_lane_s8(int8x16_t a) {
return vgetq_lane_s8(a, 15);
}
-// CHECK-LABEL: @test_vgetq_lane_s16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define signext i16 @test_vgetq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[A]], i32 7
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
int16_t test_vgetq_lane_s16(int16x8_t a) {
return vgetq_lane_s16(a, 7);
}
-// CHECK-LABEL: @test_vgetq_lane_s32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
-// CHECK: ret i32 [[VGET_LANE]]
+// CHECK-LABEL: define i32 @test_vgetq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[A]], i32 3
+// CHECK-NEXT: ret i32 [[VGET_LANE]]
+//
int32_t test_vgetq_lane_s32(int32x4_t a) {
return vgetq_lane_s32(a, 3);
}
-// CHECK-LABEL: @test_vgetq_lane_p8(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
-// CHECK: ret i8 [[VGET_LANE]]
+// CHECK-LABEL: define signext i8 @test_vgetq_lane_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[A]], i32 15
+// CHECK-NEXT: ret i8 [[VGET_LANE]]
+//
poly8_t test_vgetq_lane_p8(poly8x16_t a) {
return vgetq_lane_p8(a, 15);
}
-// CHECK-LABEL: @test_vgetq_lane_p16(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
-// CHECK: ret i16 [[VGET_LANE]]
+// CHECK-LABEL: define signext i16 @test_vgetq_lane_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[A]], i32 7
+// CHECK-NEXT: ret i16 [[VGET_LANE]]
+//
poly16_t test_vgetq_lane_p16(poly16x8_t a) {
return vgetq_lane_p16(a, 7);
}
-// CHECK-LABEL: @test_vgetq_lane_f32(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
-// CHECK: ret float [[VGET_LANE]]
+// CHECK-LABEL: define float @test_vgetq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x float> [[A]], i32 3
+// CHECK-NEXT: ret float [[VGET_LANE]]
+//
float32_t test_vgetq_lane_f32(float32x4_t a) {
return vgetq_lane_f32(a, 3);
}
-// CHECK-LABEL: @test_vgetq_lane_f16(
-// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
-// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
-// CHECK: store <8 x half> %a, ptr [[__REINT_244]], align 16
-// CHECK: [[TMP1:%.*]] = load <8 x i16>, ptr [[__REINT_244]], align 16
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
-// CHECK: store i16 [[VGET_LANE]], ptr [[__REINT1_244]], align 2
-// CHECK: [[TMP5:%.*]] = load half, ptr [[__REINT1_244]], align 2
-// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
-// CHECK: ret float [[CONV]]
+// CHECK-LABEL: define float @test_vgetq_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VGET_LANE]] to half
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: ret float [[CONV]]
+//
float32_t test_vgetq_lane_f16(float16x8_t a) {
return vgetq_lane_f16(a, 3);
}
-// CHECK-LABEL: @test_vget_lane_s64(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
-// CHECK: ret i64 [[VGET_LANE]]
+// CHECK-LABEL: define i64 @test_vget_lane_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[A]], i32 0
+// CHECK-NEXT: ret i64 [[VGET_LANE]]
+//
int64_t test_vget_lane_s64(int64x1_t a) {
return vget_lane_s64(a, 0);
}
-// CHECK-LABEL: @test_vget_lane_u64(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
-// CHECK: ret i64 [[VGET_LANE]]
+// CHECK-LABEL: define i64 @test_vget_lane_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[A]], i32 0
+// CHECK-NEXT: ret i64 [[VGET_LANE]]
+//
uint64_t test_vget_lane_u64(uint64x1_t a) {
return vget_lane_u64(a, 0);
}
-// CHECK-LABEL: @test_vgetq_lane_s64(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
-// CHECK: ret i64 [[VGET_LANE]]
+// CHECK-LABEL: define i64 @test_vgetq_lane_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i32 1
+// CHECK-NEXT: ret i64 [[VGET_LANE]]
+//
int64_t test_vgetq_lane_s64(int64x2_t a) {
return vgetq_lane_s64(a, 1);
}
-// CHECK-LABEL: @test_vgetq_lane_u64(
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
-// CHECK: ret i64 [[VGET_LANE]]
+// CHECK-LABEL: define i64 @test_vgetq_lane_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[A]], i32 1
+// CHECK-NEXT: ret i64 [[VGET_LANE]]
+//
uint64_t test_vgetq_lane_u64(uint64x2_t a) {
return vgetq_lane_u64(a, 1);
}
-// CHECK-LABEL: @test_vget_low_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_low_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vget_low_s8(int8x16_t a) {
return vget_low_s8(a);
}
-// CHECK-LABEL: @test_vget_low_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_low_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vget_low_s16(int16x8_t a) {
return vget_low_s16(a);
}
-// CHECK-LABEL: @test_vget_low_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vget_low_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vget_low_s32(int32x4_t a) {
return vget_low_s32(a);
}
-// CHECK-LABEL: @test_vget_low_s64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <1 x i64> @test_vget_low_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[SHUFFLE_I]]
+//
int64x1_t test_vget_low_s64(int64x2_t a) {
return vget_low_s64(a);
}
-// CHECK-LABEL: @test_vget_low_f16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x half> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x half> @test_vget_low_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]]
+//
float16x4_t test_vget_low_f16(float16x8_t a) {
return vget_low_f16(a);
}
-// CHECK-LABEL: @test_vget_low_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x float> @test_vget_low_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[A]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vget_low_f32(float32x4_t a) {
return vget_low_f32(a);
}
-// CHECK-LABEL: @test_vget_low_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_low_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vget_low_u8(uint8x16_t a) {
return vget_low_u8(a);
}
-// CHECK-LABEL: @test_vget_low_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_low_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vget_low_u16(uint16x8_t a) {
return vget_low_u16(a);
}
-// CHECK-LABEL: @test_vget_low_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vget_low_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vget_low_u32(uint32x4_t a) {
return vget_low_u32(a);
}
-// CHECK-LABEL: @test_vget_low_u64(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[SHUFFLE_I]]
+// CHECK-LABEL: define <1 x i64> @test_vget_low_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[A]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[SHUFFLE_I]]
+//
uint64x1_t test_vget_low_u64(uint64x2_t a) {
return vget_low_u64(a);
}
-// CHECK-LABEL: @test_vget_low_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vget_low_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vget_low_p8(poly8x16_t a) {
return vget_low_p8(a);
}
-// CHECK-LABEL: @test_vget_low_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vget_low_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vget_low_p16(poly16x8_t a) {
return vget_low_p16(a);
}
-// CHECK-LABEL: @test_vhadd_s8(
-// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VHADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]]
+//
int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
return vhadd_s8(a, b);
}
-// CHECK-LABEL: @test_vhadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
return vhadd_s16(a, b);
}
-// CHECK-LABEL: @test_vhadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
return vhadd_s32(a, b);
}
-// CHECK-LABEL: @test_vhadd_u8(
-// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VHADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]]
+//
uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
return vhadd_u8(a, b);
}
-// CHECK-LABEL: @test_vhadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
return vhadd_u16(a, b);
}
-// CHECK-LABEL: @test_vhadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]])
+// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
return vhadd_u32(a, b);
}
-// CHECK-LABEL: @test_vhaddq_s8(
-// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]]
+//
int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
return vhaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vhaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
return vhaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vhaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
return vhaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vhaddq_u8(
-// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VHADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]]
+//
uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
return vhaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vhaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
return vhaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vhaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]])
+// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
return vhaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vhsub_s8(
-// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VHSUB_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VHSUB_V_I]]
+//
int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
return vhsub_s8(a, b);
}
-// CHECK-LABEL: @test_vhsub_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHSUB_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
return vhsub_s16(a, b);
}
-// CHECK-LABEL: @test_vhsub_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHSUB_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
return vhsub_s32(a, b);
}
-// CHECK-LABEL: @test_vhsub_u8(
-// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VHSUB_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VHSUB_V_I]]
+//
uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
return vhsub_u8(a, b);
}
-// CHECK-LABEL: @test_vhsub_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VHSUB_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
return vhsub_u16(a, b);
}
-// CHECK-LABEL: @test_vhsub_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VHSUB_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]])
+// CHECK-NEXT: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
return vhsub_u32(a, b);
}
-// CHECK-LABEL: @test_vhsubq_s8(
-// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VHSUBQ_V_I]]
+//
int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
return vhsubq_s8(a, b);
}
-// CHECK-LABEL: @test_vhsubq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
return vhsubq_s16(a, b);
}
-// CHECK-LABEL: @test_vhsubq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
return vhsubq_s32(a, b);
}
-// CHECK-LABEL: @test_vhsubq_u8(
-// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VHSUBQ_V_I]]
+//
uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
return vhsubq_u8(a, b);
}
-// CHECK-LABEL: @test_vhsubq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
return vhsubq_u16(a, b);
}
-// CHECK-LABEL: @test_vhsubq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]])
+// CHECK-NEXT: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
return vhsubq_u32(a, b);
}
-// CHECK-LABEL: @test_vld1q_u8(
-// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
-// CHECK: ret <16 x i8> [[VLD1]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <16 x i8> [[VLD1]]
+//
uint8x16_t test_vld1q_u8(uint8_t const * a) {
return vld1q_u8(a);
}
-// CHECK-LABEL: @test_vld1q_u16(
-// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
-// CHECK: ret <8 x i16> [[VLD1]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <8 x i16> [[VLD1]]
+//
uint16x8_t test_vld1q_u16(uint16_t const * a) {
return vld1q_u16(a);
}
-// CHECK-LABEL: @test_vld1q_u32(
-// CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %a, i32 4)
-// CHECK: ret <4 x i32> [[VLD1]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <4 x i32> [[VLD1]]
+//
uint32x4_t test_vld1q_u32(uint32_t const * a) {
return vld1q_u32(a);
}
-// CHECK-LABEL: @test_vld1q_u64(
-// CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %a, i32 4)
-// CHECK: ret <2 x i64> [[VLD1]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <2 x i64> [[VLD1]]
+//
uint64x2_t test_vld1q_u64(uint64_t const * a) {
return vld1q_u64(a);
}
-// CHECK-LABEL: @test_vld1q_s8(
-// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
-// CHECK: ret <16 x i8> [[VLD1]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <16 x i8> [[VLD1]]
+//
int8x16_t test_vld1q_s8(int8_t const * a) {
return vld1q_s8(a);
}
-// CHECK-LABEL: @test_vld1q_s16(
-// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
-// CHECK: ret <8 x i16> [[VLD1]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <8 x i16> [[VLD1]]
+//
int16x8_t test_vld1q_s16(int16_t const * a) {
return vld1q_s16(a);
}
-// CHECK-LABEL: @test_vld1q_s32(
-// CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %a, i32 4)
-// CHECK: ret <4 x i32> [[VLD1]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <4 x i32> [[VLD1]]
+//
int32x4_t test_vld1q_s32(int32_t const * a) {
return vld1q_s32(a);
}
-// CHECK-LABEL: @test_vld1q_s64(
-// CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %a, i32 4)
-// CHECK: ret <2 x i64> [[VLD1]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <2 x i64> [[VLD1]]
+//
int64x2_t test_vld1q_s64(int64_t const * a) {
return vld1q_s64(a);
}
-// CHECK-LABEL: @test_vld1q_f16(
-// CHECK: [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0(ptr %a, i32 2)
-// CHECK: ret <8 x half> [[VLD1]]
+// CHECK-LABEL: define <8 x half> @test_vld1q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <8 x half> [[VLD1]]
+//
float16x8_t test_vld1q_f16(float16_t const * a) {
return vld1q_f16(a);
}
-// CHECK-LABEL: @test_vld1q_f32(
-// CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr %a, i32 4)
-// CHECK: ret <4 x float> [[VLD1]]
+// CHECK-LABEL: define <4 x float> @test_vld1q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <4 x float> [[VLD1]]
+//
float32x4_t test_vld1q_f32(float32_t const * a) {
return vld1q_f32(a);
}
-// CHECK-LABEL: @test_vld1q_p8(
-// CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
-// CHECK: ret <16 x i8> [[VLD1]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <16 x i8> [[VLD1]]
+//
poly8x16_t test_vld1q_p8(poly8_t const * a) {
return vld1q_p8(a);
}
-// CHECK-LABEL: @test_vld1q_p16(
-// CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
-// CHECK: ret <8 x i16> [[VLD1]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <8 x i16> [[VLD1]]
+//
poly16x8_t test_vld1q_p16(poly16_t const * a) {
return vld1q_p16(a);
}
-// CHECK-LABEL: @test_vld1_u8(
-// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
-// CHECK: ret <8 x i8> [[VLD1]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <8 x i8> [[VLD1]]
+//
uint8x8_t test_vld1_u8(uint8_t const * a) {
return vld1_u8(a);
}
-// CHECK-LABEL: @test_vld1_u16(
-// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
-// CHECK: ret <4 x i16> [[VLD1]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <4 x i16> [[VLD1]]
+//
uint16x4_t test_vld1_u16(uint16_t const * a) {
return vld1_u16(a);
}
-// CHECK-LABEL: @test_vld1_u32(
-// CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %a, i32 4)
-// CHECK: ret <2 x i32> [[VLD1]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <2 x i32> [[VLD1]]
+//
uint32x2_t test_vld1_u32(uint32_t const * a) {
return vld1_u32(a);
}
-// CHECK-LABEL: @test_vld1_u64(
-// CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
-// CHECK: ret <1 x i64> [[VLD1]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <1 x i64> [[VLD1]]
+//
uint64x1_t test_vld1_u64(uint64_t const * a) {
return vld1_u64(a);
}
-// CHECK-LABEL: @test_vld1_s8(
-// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
-// CHECK: ret <8 x i8> [[VLD1]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <8 x i8> [[VLD1]]
+//
int8x8_t test_vld1_s8(int8_t const * a) {
return vld1_s8(a);
}
-// CHECK-LABEL: @test_vld1_s16(
-// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
-// CHECK: ret <4 x i16> [[VLD1]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <4 x i16> [[VLD1]]
+//
int16x4_t test_vld1_s16(int16_t const * a) {
return vld1_s16(a);
}
-// CHECK-LABEL: @test_vld1_s32(
-// CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %a, i32 4)
-// CHECK: ret <2 x i32> [[VLD1]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <2 x i32> [[VLD1]]
+//
int32x2_t test_vld1_s32(int32_t const * a) {
return vld1_s32(a);
}
-// CHECK-LABEL: @test_vld1_s64(
-// CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
-// CHECK: ret <1 x i64> [[VLD1]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <1 x i64> [[VLD1]]
+//
int64x1_t test_vld1_s64(int64_t const * a) {
return vld1_s64(a);
}
-// CHECK-LABEL: @test_vld1_f16(
-// CHECK: [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0(ptr %a, i32 2)
-// CHECK: ret <4 x half> [[VLD1]]
+// CHECK-LABEL: define <4 x half> @test_vld1_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <4 x half> [[VLD1]]
+//
float16x4_t test_vld1_f16(float16_t const * a) {
return vld1_f16(a);
}
-// CHECK-LABEL: @test_vld1_f32(
-// CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0(ptr %a, i32 4)
-// CHECK: ret <2 x float> [[VLD1]]
+// CHECK-LABEL: define <2 x float> @test_vld1_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: ret <2 x float> [[VLD1]]
+//
float32x2_t test_vld1_f32(float32_t const * a) {
return vld1_f32(a);
}
-// CHECK-LABEL: @test_vld1_p8(
-// CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
-// CHECK: ret <8 x i8> [[VLD1]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: ret <8 x i8> [[VLD1]]
+//
poly8x8_t test_vld1_p8(poly8_t const * a) {
return vld1_p8(a);
}
-// CHECK-LABEL: @test_vld1_p16(
-// CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
-// CHECK: ret <4 x i16> [[VLD1]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: ret <4 x i16> [[VLD1]]
+//
poly16x4_t test_vld1_p16(poly16_t const * a) {
return vld1_p16(a);
}
-// CHECK-LABEL: @test_vld1q_dup_u8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
return vld1q_dup_u8(a);
}
-// CHECK-LABEL: @test_vld1q_dup_u16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
return vld1q_dup_u16(a);
}
-// CHECK-LABEL: @test_vld1q_dup_u32(
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
return vld1q_dup_u32(a);
}
-// CHECK-LABEL: @test_vld1q_dup_u64(
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
return vld1q_dup_u64(a);
}
-// CHECK-LABEL: @test_vld1q_dup_s8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
int8x16_t test_vld1q_dup_s8(int8_t const * a) {
return vld1q_dup_s8(a);
}
-// CHECK-LABEL: @test_vld1q_dup_s16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
int16x8_t test_vld1q_dup_s16(int16_t const * a) {
return vld1q_dup_s16(a);
}
-// CHECK-LABEL: @test_vld1q_dup_s32(
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i32> [[LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i32> [[LANE]]
+//
int32x4_t test_vld1q_dup_s32(int32_t const * a) {
return vld1q_dup_s32(a);
}
-// CHECK-LABEL: @test_vld1q_dup_s64(
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i64> [[LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i64> [[LANE]]
+//
int64x2_t test_vld1q_dup_s64(int64_t const * a) {
return vld1q_dup_s64(a);
}
-// CHECK-LABEL: @test_vld1q_dup_f16(
-// CHECK: [[TMP2:%.*]] = load half, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <8 x half> poison, half [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x half> [[LANE]]
+// CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x half> [[LANE]]
+//
float16x8_t test_vld1q_dup_f16(float16_t const * a) {
return vld1q_dup_f16(a);
}
-// CHECK-LABEL: @test_vld1q_dup_f32(
-// CHECK: [[TMP2:%.*]] = load float, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x float> [[LANE]]
+// CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x float> [[LANE]]
+//
float32x4_t test_vld1q_dup_f32(float32_t const * a) {
return vld1q_dup_f32(a);
}
-// CHECK-LABEL: @test_vld1q_dup_p8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK: ret <16 x i8> [[LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
return vld1q_dup_p8(a);
}
-// CHECK-LABEL: @test_vld1q_dup_p16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i16> [[LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i16> [[LANE]]
+//
poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
return vld1q_dup_p16(a);
}
-// CHECK-LABEL: @test_vld1_dup_u8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
return vld1_dup_u8(a);
}
-// CHECK-LABEL: @test_vld1_dup_u16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
return vld1_dup_u16(a);
}
-// CHECK-LABEL: @test_vld1_dup_u32(
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
return vld1_dup_u32(a);
}
-// CHECK-LABEL: @test_vld1_dup_u64(
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
return vld1_dup_u64(a);
}
-// CHECK-LABEL: @test_vld1_dup_s8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
int8x8_t test_vld1_dup_s8(int8_t const * a) {
return vld1_dup_s8(a);
}
-// CHECK-LABEL: @test_vld1_dup_s16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
int16x4_t test_vld1_dup_s16(int16_t const * a) {
return vld1_dup_s16(a);
}
-// CHECK-LABEL: @test_vld1_dup_s32(
-// CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x i32> [[LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x i32> [[LANE]]
+//
int32x2_t test_vld1_dup_s32(int32_t const * a) {
return vld1_dup_s32(a);
}
-// CHECK-LABEL: @test_vld1_dup_s64(
-// CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
-// CHECK: ret <1 x i64> [[LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: ret <1 x i64> [[LANE]]
+//
int64x1_t test_vld1_dup_s64(int64_t const * a) {
return vld1_dup_s64(a);
}
-// CHECK-LABEL: @test_vld1_dup_f16(
-// CHECK: [[TMP2:%.*]] = load half, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <4 x half> poison, half [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x half> [[LANE]]
+// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x half> [[LANE]]
+//
float16x4_t test_vld1_dup_f16(float16_t const * a) {
return vld1_dup_f16(a);
}
-// CHECK-LABEL: @test_vld1_dup_f32(
-// CHECK: [[TMP2:%.*]] = load float, ptr %a, align 4
-// CHECK: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
-// CHECK: ret <2 x float> [[LANE]]
+// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer
+// CHECK-NEXT: ret <2 x float> [[LANE]]
+//
float32x2_t test_vld1_dup_f32(float32_t const * a) {
return vld1_dup_f32(a);
}
-// CHECK-LABEL: @test_vld1_dup_p8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK: ret <8 x i8> [[LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
return vld1_dup_p8(a);
}
-// CHECK-LABEL: @test_vld1_dup_p16(
-// CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
-// CHECK: ret <4 x i16> [[LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer
+// CHECK-NEXT: ret <4 x i16> [[LANE]]
+//
poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
return vld1_dup_p16(a);
}
-// CHECK-LABEL: @test_vld1q_lane_u8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
return vld1q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: @test_vld1q_lane_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
return vld1q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vld1q_lane_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
-// CHECK: ret <4 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VLD1_LANE]]
+//
uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
return vld1q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vld1q_lane_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
-// CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i64> [[VLD1Q_LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[VLD1Q_LANE]]
+//
uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
return vld1q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vld1q_lane_s8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
return vld1q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: @test_vld1q_lane_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
return vld1q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vld1q_lane_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
-// CHECK: ret <4 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VLD1_LANE]]
+//
int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
return vld1q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vld1q_lane_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
-// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
-// CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
-// CHECK: ret <2 x i64> [[VLD1Q_LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: ret <2 x i64> [[VLD1Q_LANE]]
+//
int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
return vld1q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vld1q_lane_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: [[TMP4:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
-// CHECK: ret <8 x half> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP3]], i32 7
+// CHECK-NEXT: ret <8 x half> [[VLD1_LANE]]
+//
float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
return vld1q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vld1q_lane_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[TMP4:%.*]] = load float, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
-// CHECK: ret <4 x float> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VLD1_LANE]]
+//
float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
return vld1q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vld1q_lane_p8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
-// CHECK: ret <16 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
return vld1q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: @test_vld1q_lane_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
-// CHECK: ret <8 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VLD1_LANE]]
+//
poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
return vld1q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vld1_lane_u8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
return vld1_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vld1_lane_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
return vld1_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vld1_lane_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
-// CHECK: ret <2 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VLD1_LANE]]
+//
uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
return vld1_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vld1_lane_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
-// CHECK: ret <1 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VLD1_LANE]]
+//
uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
return vld1_lane_u64(a, b, 0);
}
-// CHECK-LABEL: @test_vld1_lane_s8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
return vld1_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vld1_lane_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
return vld1_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vld1_lane_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
-// CHECK: ret <2 x i32> [[VLD1_LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VLD1_LANE]]
+//
int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
return vld1_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vld1_lane_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = load i64, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
-// CHECK: ret <1 x i64> [[VLD1_LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VLD1_LANE]]
+//
int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
return vld1_lane_s64(a, b, 0);
}
-// CHECK-LABEL: @test_vld1_lane_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: [[TMP4:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
-// CHECK: ret <4 x half> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP3]], i32 3
+// CHECK-NEXT: ret <4 x half> [[VLD1_LANE]]
+//
float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
return vld1_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vld1_lane_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[TMP4:%.*]] = load float, ptr %a, align 4
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
-// CHECK: ret <2 x float> [[VLD1_LANE]]
+// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[A]], align 4
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i32 1
+// CHECK-NEXT: ret <2 x float> [[VLD1_LANE]]
+//
float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
return vld1_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vld1_lane_p8(
-// CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
-// CHECK: ret <8 x i8> [[VLD1_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
return vld1_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vld1_lane_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
-// CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
-// CHECK: ret <4 x i16> [[VLD1_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[A]], align 2
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VLD1_LANE]]
+//
poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
return vld1_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vld2q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld2q_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
return vld2q_u8(a);
}
-// CHECK-LABEL: @test_vld2q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
return vld2q_u16(a);
}
-// CHECK-LABEL: @test_vld2q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld2q_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
return vld2q_u32(a);
}
-// CHECK-LABEL: @test_vld2q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld2q_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int8x16x2_t test_vld2q_s8(int8_t const * a) {
return vld2q_s8(a);
}
-// CHECK-LABEL: @test_vld2q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x2_t test_vld2q_s16(int16_t const * a) {
return vld2q_s16(a);
}
-// CHECK-LABEL: @test_vld2q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld2q_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x2_t test_vld2q_s32(int32_t const * a) {
return vld2q_s32(a);
}
-// CHECK-LABEL: @test_vld2q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld2q_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2.v8f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <8 x half> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x2_t test_vld2q_f16(float16_t const * a) {
return vld2q_f16(a);
}
-// CHECK-LABEL: @test_vld2q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld2q_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <4 x float> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x2_t test_vld2q_f32(float32_t const * a) {
return vld2q_f32(a);
}
-// CHECK-LABEL: @test_vld2q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld2q_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
return vld2q_p8(a);
}
-// CHECK-LABEL: @test_vld2q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 0
+// CHECK-NEXT: [[VLD2Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
return vld2q_p16(a);
}
-// CHECK-LABEL: @test_vld2_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x2_t test_vld2_u8(uint8_t const * a) {
return vld2_u8(a);
}
-// CHECK-LABEL: @test_vld2_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x2_t test_vld2_u16(uint16_t const * a) {
return vld2_u16(a);
}
-// CHECK-LABEL: @test_vld2_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld2_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_V]], 1
+// CHECK-NEXT: store <2 x i32> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x2_t test_vld2_u32(uint32_t const * a) {
return vld2_u32(a);
}
-// CHECK-LABEL: @test_vld2_u64(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld2_u64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT64X1X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_V]], 1
+// CHECK-NEXT: store <1 x i64> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint64x1x2_t test_vld2_u64(uint64_t const * a) {
return vld2_u64(a);
}
-// CHECK-LABEL: @test_vld2_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x2_t test_vld2_s8(int8_t const * a) {
return vld2_s8(a);
}
-// CHECK-LABEL: @test_vld2_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x2_t test_vld2_s16(int16_t const * a) {
return vld2_s16(a);
}
-// CHECK-LABEL: @test_vld2_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld2_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_V]], 1
+// CHECK-NEXT: store <2 x i32> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x2_t test_vld2_s32(int32_t const * a) {
return vld2_s32(a);
}
-// CHECK-LABEL: @test_vld2_s64(
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld2_s64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT64X1X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[VLD2_V]], 1
+// CHECK-NEXT: store <1 x i64> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int64x1x2_t test_vld2_s64(int64_t const * a) {
return vld2_s64(a);
}
-// CHECK-LABEL: @test_vld2_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld2_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2.v4f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_V]], 1
+// CHECK-NEXT: store <4 x half> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x2_t test_vld2_f16(float16_t const * a) {
return vld2_f16(a);
}
-// CHECK-LABEL: @test_vld2_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld2_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_V]], 1
+// CHECK-NEXT: store <2 x float> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x2_t test_vld2_f32(float32_t const * a) {
return vld2_f32(a);
}
-// CHECK-LABEL: @test_vld2_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x2_t test_vld2_p8(poly8_t const * a) {
return vld2_p8(a);
}
-// CHECK-LABEL: @test_vld2_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD2_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 0
+// CHECK-NEXT: [[VLD2_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x2_t test_vld2_p16(poly16_t const * a) {
return vld2_p16(a);
}
-// CHECK-LABEL: @test_vld2q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
return vld2q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vld2q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld2q_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
return vld2q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vld2q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
return vld2q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vld2q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld2q_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
return vld2q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vld2q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld2q_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr [[A]], <8 x half> [[TMP2]], <8 x half> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <8 x half> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
return vld2q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vld2q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld2q_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0(ptr [[A]], <4 x float> [[TMP2]], <4 x float> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <4 x float> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
return vld2q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vld2q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld2q_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD2Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], 1
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD2Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
return vld2q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vld2_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_lane_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
return vld2_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vld2_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
return vld2_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vld2_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld2_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <2 x i32> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
return vld2_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vld2_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_lane_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
return vld2_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vld2_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
return vld2_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vld2_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld2_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <2 x i32> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
return vld2_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vld2_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld2_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr [[A]], <4 x half> [[TMP4]], <4 x half> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <4 x half> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
return vld2_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vld2_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld2_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0(ptr [[A]], <2 x float> [[TMP4]], <2 x float> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <2 x float> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
return vld2_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vld2_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld2_lane_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
return vld2_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vld2_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld2_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 0
+// CHECK-NEXT: [[VLD2_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], 1
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD2_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
return vld2_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vld3q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld3q_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
return vld3q_u8(a);
}
-// CHECK-LABEL: @test_vld3q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
return vld3q_u16(a);
}
-// CHECK-LABEL: @test_vld3q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld3q_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
return vld3q_u32(a);
}
-// CHECK-LABEL: @test_vld3q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld3q_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int8x16x3_t test_vld3q_s8(int8_t const * a) {
return vld3q_s8(a);
}
-// CHECK-LABEL: @test_vld3q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x3_t test_vld3q_s16(int16_t const * a) {
return vld3q_s16(a);
}
-// CHECK-LABEL: @test_vld3q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld3q_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x3_t test_vld3q_s32(int32_t const * a) {
return vld3q_s32(a);
}
-// CHECK-LABEL: @test_vld3q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld3q_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3.v8f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <8 x half> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x half> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x3_t test_vld3q_f16(float16_t const * a) {
return vld3q_f16(a);
}
-// CHECK-LABEL: @test_vld3q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld3q_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <4 x float> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x float> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x3_t test_vld3q_f32(float32_t const * a) {
return vld3q_f32(a);
}
-// CHECK-LABEL: @test_vld3q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld3q_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
return vld3q_p8(a);
}
-// CHECK-LABEL: @test_vld3q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 0
+// CHECK-NEXT: [[VLD3Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 1
+// CHECK-NEXT: [[VLD3Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
return vld3q_p16(a);
}
-// CHECK-LABEL: @test_vld3_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x3_t test_vld3_u8(uint8_t const * a) {
return vld3_u8(a);
}
-// CHECK-LABEL: @test_vld3_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x3_t test_vld3_u16(uint16_t const * a) {
return vld3_u16(a);
}
-// CHECK-LABEL: @test_vld3_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld3_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 2
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x3_t test_vld3_u32(uint32_t const * a) {
return vld3_u32(a);
}
-// CHECK-LABEL: @test_vld3_u64(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld3_u64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT64X1X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 2
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint64x1x3_t test_vld3_u64(uint64_t const * a) {
return vld3_u64(a);
}
-// CHECK-LABEL: @test_vld3_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x3_t test_vld3_s8(int8_t const * a) {
return vld3_s8(a);
}
-// CHECK-LABEL: @test_vld3_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x3_t test_vld3_s16(int16_t const * a) {
return vld3_s16(a);
}
-// CHECK-LABEL: @test_vld3_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld3_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], 2
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x3_t test_vld3_s32(int32_t const * a) {
return vld3_s32(a);
}
-// CHECK-LABEL: @test_vld3_s64(
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld3_s64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT64X1X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], 2
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <1 x i64> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int64x1x3_t test_vld3_s64(int64_t const * a) {
return vld3_s64(a);
}
-// CHECK-LABEL: @test_vld3_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld3_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3.v4f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_V]], 2
+// CHECK-NEXT: store <4 x half> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x half> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x3_t test_vld3_f16(float16_t const * a) {
return vld3_f16(a);
}
-// CHECK-LABEL: @test_vld3_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld3_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], 2
+// CHECK-NEXT: store <2 x float> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x float> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x3_t test_vld3_f32(float32_t const * a) {
return vld3_f32(a);
}
-// CHECK-LABEL: @test_vld3_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x3_t test_vld3_p8(poly8_t const * a) {
return vld3_p8(a);
}
-// CHECK-LABEL: @test_vld3_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD3_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 0
+// CHECK-NEXT: [[VLD3_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 1
+// CHECK-NEXT: [[VLD3_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x3_t test_vld3_p16(poly16_t const * a) {
return vld3_p16(a);
}
-// CHECK-LABEL: @test_vld3q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
return vld3q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vld3q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld3q_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
return vld3q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vld3q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
return vld3q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vld3q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld3q_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
return vld3q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vld3q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld3q_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr [[A]], <8 x half> [[TMP3]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <8 x half> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x half> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
return vld3q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vld3q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld3q_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0(ptr [[A]], <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <4 x float> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x float> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
return vld3q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vld3q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld3q_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X3_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD3Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], 2
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD3Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
return vld3q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vld3_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_lane_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
return vld3_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vld3_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
return vld3_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vld3_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld3_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
return vld3_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vld3_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_lane_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
return vld3_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vld3_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
return vld3_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vld3_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld3_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
return vld3_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vld3_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld3_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr [[A]], <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <4 x half> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x half> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
return vld3_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vld3_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld3_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0(ptr [[A]], <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <2 x float> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x float> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
return vld3_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vld3_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld3_lane_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
return vld3_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vld3_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld3_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X3_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 0
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 1
+// CHECK-NEXT: [[VLD3_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], 2
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD3_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
return vld3_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vld4q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld4q_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
return vld4q_u8(a);
}
-// CHECK-LABEL: @test_vld4q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
return vld4q_u16(a);
}
-// CHECK-LABEL: @test_vld4q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld4q_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
return vld4q_u32(a);
}
-// CHECK-LABEL: @test_vld4q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld4q_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int8x16x4_t test_vld4q_s8(int8_t const * a) {
return vld4q_s8(a);
}
-// CHECK-LABEL: @test_vld4q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x4_t test_vld4q_s16(int16_t const * a) {
return vld4q_s16(a);
}
-// CHECK-LABEL: @test_vld4q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld4q_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x4_t test_vld4q_s32(int32_t const * a) {
return vld4q_s32(a);
}
-// CHECK-LABEL: @test_vld4q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld4q_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4.v8f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <8 x half> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x half> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x half> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x4_t test_vld4q_f16(float16_t const * a) {
return vld4q_f16(a);
}
-// CHECK-LABEL: @test_vld4q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld4q_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <4 x float> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x float> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x float> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x4_t test_vld4q_f32(float32_t const * a) {
return vld4q_f32(a);
}
-// CHECK-LABEL: @test_vld4q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
+// CHECK-LABEL: define void @test_vld4q_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <16 x i8> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
return vld4q_p8(a);
}
-// CHECK-LABEL: @test_vld4q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4Q_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 0
+// CHECK-NEXT: [[VLD4Q_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 1
+// CHECK-NEXT: [[VLD4Q_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 2
+// CHECK-NEXT: [[VLD4Q_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
return vld4q_p16(a);
}
-// CHECK-LABEL: @test_vld4_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x4_t test_vld4_u8(uint8_t const * a) {
return vld4_u8(a);
}
-// CHECK-LABEL: @test_vld4_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x4_t test_vld4_u16(uint16_t const * a) {
return vld4_u16(a);
}
-// CHECK-LABEL: @test_vld4_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld4_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 3
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x4_t test_vld4_u32(uint32_t const * a) {
return vld4_u32(a);
}
-// CHECK-LABEL: @test_vld4_u64(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld4_u64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT64X1X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 3
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint64x1x4_t test_vld4_u64(uint64_t const * a) {
return vld4_u64(a);
}
-// CHECK-LABEL: @test_vld4_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x4_t test_vld4_s8(int8_t const * a) {
return vld4_s8(a);
}
-// CHECK-LABEL: @test_vld4_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x4_t test_vld4_s16(int16_t const * a) {
return vld4_s16(a);
}
-// CHECK-LABEL: @test_vld4_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld4_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], 3
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x i32> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x4_t test_vld4_s32(int32_t const * a) {
return vld4_s32(a);
}
-// CHECK-LABEL: @test_vld4_s64(
-// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
+// CHECK-LABEL: define void @test_vld4_s64(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT64X1X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], 3
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <1 x i64> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int64x1x4_t test_vld4_s64(int64_t const * a) {
return vld4_s64(a);
}
-// CHECK-LABEL: @test_vld4_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld4_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4.v4f16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_V]], 3
+// CHECK-NEXT: store <4 x half> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x half> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x half> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x4_t test_vld4_f16(float16_t const * a) {
return vld4_f16(a);
}
-// CHECK-LABEL: @test_vld4_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld4_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0(ptr [[A]], i32 4)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], 3
+// CHECK-NEXT: store <2 x float> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x float> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x float> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x4_t test_vld4_f32(float32_t const * a) {
return vld4_f32(a);
}
-// CHECK-LABEL: @test_vld4_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0(ptr [[A]], i32 1)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x4_t test_vld4_p8(poly8_t const * a) {
return vld4_p8(a);
}
-// CHECK-LABEL: @test_vld4_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0(ptr [[A]], i32 2)
+// CHECK-NEXT: [[VLD4_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 0
+// CHECK-NEXT: [[VLD4_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 1
+// CHECK-NEXT: [[VLD4_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 2
+// CHECK-NEXT: [[VLD4_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x4_t test_vld4_p16(poly16_t const * a) {
return vld4_p16(a);
}
-// CHECK-LABEL: @test_vld4q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
return vld4q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vld4q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld4q_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
return vld4q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vld4q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
return vld4q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vld4q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
+// CHECK-LABEL: define void @test_vld4q_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x i32> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
return vld4q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vld4q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
+// CHECK-LABEL: define void @test_vld4q_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr [[A]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x half> [[TMP6]], <8 x half> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <8 x half> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x half> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x half> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x half> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
return vld4q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vld4q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
+// CHECK-LABEL: define void @test_vld4q_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0(ptr [[A]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <4 x float> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x float> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <4 x float> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <4 x float> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
return vld4q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vld4q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
-// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
+// CHECK-LABEL: define void @test_vld4q_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X4_T:%.*]]) align 16 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 0
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 1
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 2
+// CHECK-NEXT: [[VLD4Q_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], 3
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 32
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 48
+// CHECK-NEXT: store <8 x i16> [[VLD4Q_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 16
+// CHECK-NEXT: ret void
+//
poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
return vld4q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vld4_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_lane_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
return vld4_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vld4_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_lane_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
return vld4_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vld4_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld4_lane_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
return vld4_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vld4_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_lane_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
return vld4_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vld4_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_lane_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
return vld4_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vld4_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
+// CHECK-LABEL: define void @test_vld4_lane_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x i32> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
return vld4_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vld4_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
+// CHECK-LABEL: define void @test_vld4_lane_f16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr [[A]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <4 x half> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x half> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x half> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x half> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
return vld4_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vld4_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
+// CHECK-LABEL: define void @test_vld4_lane_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0(ptr [[A]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <2 x float> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <2 x float> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <2 x float> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <2 x float> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
return vld4_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vld4_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
+// CHECK-LABEL: define void @test_vld4_lane_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <8 x i8> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
return vld4_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vld4_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
-// CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
-// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
+// CHECK-LABEL: define void @test_vld4_lane_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X4_T:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_0_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 0
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_1_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 1
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_2_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 2
+// CHECK-NEXT: [[VLD4_LANE_V_FCA_3_EXTRACT:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], 3
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_0_EXTRACT]], ptr [[AGG_RESULT]], align 8
+// CHECK-NEXT: [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 8
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_1_EXTRACT]], ptr [[__RET_SROA_2_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 16
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_2_EXTRACT]], ptr [[__RET_SROA_3_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT]], i32 24
+// CHECK-NEXT: store <4 x i16> [[VLD4_LANE_V_FCA_3_EXTRACT]], ptr [[__RET_SROA_4_0_AGG_RESULT_SROA_IDX]], align 8
+// CHECK-NEXT: ret void
+//
poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
return vld4_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vmax_s8(
-// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMAX_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmax_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMAX_V_I]]
+//
int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
return vmax_s8(a, b);
}
-// CHECK-LABEL: @test_vmax_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VMAX_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmax_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]])
+// CHECK-NEXT: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
return vmax_s16(a, b);
}
-// CHECK-LABEL: @test_vmax_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VMAX_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmax_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]])
+// CHECK-NEXT: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
return vmax_s32(a, b);
}
-// CHECK-LABEL: @test_vmax_u8(
-// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMAX_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmax_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMAX_V_I]]
+//
uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
return vmax_u8(a, b);
}
-// CHECK-LABEL: @test_vmax_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VMAX_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmax_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]])
+// CHECK-NEXT: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
return vmax_u16(a, b);
}
-// CHECK-LABEL: @test_vmax_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VMAX_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmax_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]])
+// CHECK-NEXT: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
return vmax_u32(a, b);
}
-// CHECK-LABEL: @test_vmax_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VMAX_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vmax_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> [[VMAX_V_I]], <2 x float> [[VMAX_V1_I]])
+// CHECK-NEXT: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
return vmax_f32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s8(
-// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMAXQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMAXQ_V_I]]
+//
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
return vmaxq_s8(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]])
+// CHECK-NEXT: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
return vmaxq_s16(a, b);
}
-// CHECK-LABEL: @test_vmaxq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]])
+// CHECK-NEXT: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
return vmaxq_s32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u8(
-// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMAXQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMAXQ_V_I]]
+//
uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
return vmaxq_u8(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]])
+// CHECK-NEXT: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
return vmaxq_u16(a, b);
}
-// CHECK-LABEL: @test_vmaxq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]])
+// CHECK-NEXT: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
return vmaxq_u32(a, b);
}
-// CHECK-LABEL: @test_vmaxq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VMAXQ_V2_I]]
+// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> [[VMAXQ_V_I]], <4 x float> [[VMAXQ_V1_I]])
+// CHECK-NEXT: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
return vmaxq_f32(a, b);
}
-// CHECK-LABEL: @test_vmin_s8(
-// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMIN_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmin_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMIN_V_I]]
+//
int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
return vmin_s8(a, b);
}
-// CHECK-LABEL: @test_vmin_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VMIN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmin_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]])
+// CHECK-NEXT: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
return vmin_s16(a, b);
}
-// CHECK-LABEL: @test_vmin_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VMIN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmin_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]])
+// CHECK-NEXT: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
return vmin_s32(a, b);
}
-// CHECK-LABEL: @test_vmin_u8(
-// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMIN_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmin_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMIN_V_I]]
+//
uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
return vmin_u8(a, b);
}
-// CHECK-LABEL: @test_vmin_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VMIN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmin_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]])
+// CHECK-NEXT: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
return vmin_u16(a, b);
}
-// CHECK-LABEL: @test_vmin_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VMIN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmin_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]])
+// CHECK-NEXT: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
return vmin_u32(a, b);
}
-// CHECK-LABEL: @test_vmin_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VMIN_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vmin_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> [[VMIN_V_I]], <2 x float> [[VMIN_V1_I]])
+// CHECK-NEXT: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
return vmin_f32(a, b);
}
-// CHECK-LABEL: @test_vminq_s8(
-// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMINQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vminq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMINQ_V_I]]
+//
int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
return vminq_s8(a, b);
}
-// CHECK-LABEL: @test_vminq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VMINQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vminq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]])
+// CHECK-NEXT: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
return vminq_s16(a, b);
}
-// CHECK-LABEL: @test_vminq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VMINQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vminq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]])
+// CHECK-NEXT: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
return vminq_s32(a, b);
}
-// CHECK-LABEL: @test_vminq_u8(
-// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMINQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vminq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMINQ_V_I]]
+//
uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
return vminq_u8(a, b);
}
-// CHECK-LABEL: @test_vminq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VMINQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vminq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]])
+// CHECK-NEXT: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
return vminq_u16(a, b);
}
-// CHECK-LABEL: @test_vminq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VMINQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vminq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]])
+// CHECK-NEXT: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
return vminq_u32(a, b);
}
-// CHECK-LABEL: @test_vminq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VMINQ_V2_I]]
+// CHECK-LABEL: define <4 x float> @test_vminq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> [[VMINQ_V_I]], <4 x float> [[VMINQ_V1_I]])
+// CHECK-NEXT: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
return vminq_f32(a, b);
}
-// CHECK-LABEL: @test_vmla_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmla_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmla_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmla_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmla_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmla_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmla_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmla_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
-// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
-// CHECK: ret <2 x float> [[ADD_I]]
+// CHECK-LABEL: define <2 x float> @test_vmla_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmla_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmla_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
-// CHECK: ret <8 x i8> [[ADD_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmla_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[ADD_I]]
+//
uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmla_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmla_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmla_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmla_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmla_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlaq_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlaq_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlaq_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
-// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
-// CHECK: ret <4 x float> [[ADD_I]]
+// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlaq_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
-// CHECK: ret <16 x i8> [[ADD_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[ADD_I]]
+//
uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlaq_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlaq_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlaq_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_s8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlal_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMULL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlal_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <4 x i32> [[ADD]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD]]
+//
int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlal_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <2 x i64> [[ADD]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD]]
+//
int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlal_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <4 x i32> [[ADD]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD]]
+//
uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlal_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <2 x i64> [[ADD]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD]]
+//
uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlal_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vmlal_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vmlal_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlal_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlal_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[ADD_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[ADD_I]]
+//
uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlal_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmla_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i16> [[ADD]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i16> [[ADD]]
+//
int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmla_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmla_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x i32> [[ADD]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x i32> [[ADD]]
+//
int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmla_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmla_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i16> [[ADD]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i16> [[ADD]]
+//
uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmla_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmla_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x i32> [[ADD]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x i32> [[ADD]]
+//
uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmla_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmla_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x float> [[ADD]]
+// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x float> [[ADD]]
+//
float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmla_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlaq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <8 x i16> [[ADD]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <8 x i16> [[ADD]]
+//
int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
return vmlaq_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlaq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i32> [[ADD]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i32> [[ADD]]
+//
int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
return vmlaq_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlaq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <8 x i16> [[ADD]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <8 x i16> [[ADD]]
+//
uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
return vmlaq_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlaq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i32> [[ADD]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i32> [[ADD]]
+//
uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
return vmlaq_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlaq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
-// CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x float> [[ADD]]
+// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
+// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x float> [[ADD]]
+//
float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
return vmlaq_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmla_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
return vmla_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmla_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
return vmla_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmla_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
-// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[ADD_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[ADD_I]]
+//
uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmla_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmla_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
-// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[ADD_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[ADD_I]]
+//
uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmla_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmla_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
-// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
-// CHECK: ret <2 x float> [[ADD_I]]
+// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[ADD_I]]
+//
float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmla_n_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[VECINIT7_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
return vmlaq_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
return vmlaq_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
-// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[ADD_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[VECINIT7_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[ADD_I]]
+//
uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlaq_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
-// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[ADD_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
+//
uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlaq_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlaq_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
-// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
-// CHECK: ret <4 x float> [[ADD_I]]
+// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[ADD_I]]
+//
float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
return vmlaq_n_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmls_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmls_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmls_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmls_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmls_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmls_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
-// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define <2 x float> @test_vmls_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmls_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmls_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmls_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmls_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmls_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmls_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmls_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlsq_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlsq_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlsq_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
-// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlsq_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlsq_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlsq_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[C]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlsq_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlsl_s8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u8(
-// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMULL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlsl_u8(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <4 x i32> [[SUB]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB]]
+//
int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlsl_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <2 x i64> [[SUB]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB]]
+//
int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlsl_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <4 x i32> [[SUB]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB]]
+//
uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlsl_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
-// CHECK: ret <2 x i64> [[SUB]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB]]
+//
uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlsl_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vmlsl_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vmlsl_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlsl_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsl_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMULL2_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlsl_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i16> [[SUB]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i16> [[SUB]]
+//
int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmls_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmls_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x i32> [[SUB]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x i32> [[SUB]]
+//
int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmls_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmls_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i16> [[SUB]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i16> [[SUB]]
+//
uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmls_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmls_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x i32> [[SUB]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x i32> [[SUB]]
+//
uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmls_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmls_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
-// CHECK: ret <2 x float> [[SUB]]
+// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = fsub <2 x float> [[A]], [[MUL]]
+// CHECK-NEXT: ret <2 x float> [[SUB]]
+//
float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmls_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlsq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <8 x i16> [[SUB]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <8 x i16> [[SUB]]
+//
int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
return vmlsq_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlsq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i32> [[SUB]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i32> [[SUB]]
+//
int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
return vmlsq_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlsq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
-// CHECK: ret <8 x i16> [[SUB]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A]], [[MUL]]
+// CHECK-NEXT: ret <8 x i16> [[SUB]]
+//
uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
return vmlsq_lane_u16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vmlsq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x i32> [[SUB]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x i32> [[SUB]]
+//
uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
return vmlsq_lane_u32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmlsq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
-// CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
-// CHECK: ret <4 x float> [[SUB]]
+// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <2 x float> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[C]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[B]], [[LANE]]
+// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x float> [[A]], [[MUL]]
+// CHECK-NEXT: ret <4 x float> [[SUB]]
+//
float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
return vmlsq_lane_f32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vmls_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
return vmls_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmls_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
return vmls_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmls_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmls_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmls_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmls_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
-// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[C]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[B]], [[VECINIT1_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmls_n_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[VECINIT7_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
return vmlsq_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
return vmlsq_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], i16 noundef zeroext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B]], [[VECINIT7_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlsq_n_u16(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlsq_n_u32(a, b, c);
}
-// CHECK-LABEL: @test_vmlsq_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
-// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[B]], [[VECINIT3_I]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[A]], [[MUL_I]]
+// CHECK-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
return vmlsq_n_f32(a, b, c);
}
-// CHECK-LABEL: @test_vmovl_s8(
-// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
int16x8_t test_vmovl_s8(int8x8_t a) {
return vmovl_s8(a);
}
-// CHECK-LABEL: @test_vmovl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
int32x4_t test_vmovl_s16(int16x4_t a) {
return vmovl_s16(a);
}
-// CHECK-LABEL: @test_vmovl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
int64x2_t test_vmovl_s32(int32x2_t a) {
return vmovl_s32(a);
}
-// CHECK-LABEL: @test_vmovl_u8(
-// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[VMOVL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]]
+//
uint16x8_t test_vmovl_u8(uint8x8_t a) {
return vmovl_u8(a);
}
-// CHECK-LABEL: @test_vmovl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[VMOVL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+//
uint32x4_t test_vmovl_u16(uint16x4_t a) {
return vmovl_u16(a);
}
-// CHECK-LABEL: @test_vmovl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[VMOVL_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]]
+//
uint64x2_t test_vmovl_u32(uint32x2_t a) {
return vmovl_u32(a);
}
-// CHECK-LABEL: @test_vmovn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[VMOVN_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VMOVN_I]]
+//
int8x8_t test_vmovn_s16(int16x8_t a) {
return vmovn_s16(a);
}
-// CHECK-LABEL: @test_vmovn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[VMOVN_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VMOVN_I]]
+//
int16x4_t test_vmovn_s32(int32x4_t a) {
return vmovn_s32(a);
}
-// CHECK-LABEL: @test_vmovn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VMOVN_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VMOVN_I]]
+//
int32x2_t test_vmovn_s64(int64x2_t a) {
return vmovn_s64(a);
}
-// CHECK-LABEL: @test_vmovn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[VMOVN_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VMOVN_I]]
+//
uint8x8_t test_vmovn_u16(uint16x8_t a) {
return vmovn_u16(a);
}
-// CHECK-LABEL: @test_vmovn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[VMOVN_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VMOVN_I]]
+//
uint16x4_t test_vmovn_u32(uint32x4_t a) {
return vmovn_u32(a);
}
-// CHECK-LABEL: @test_vmovn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[VMOVN_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VMOVN_I]]
+//
uint32x2_t test_vmovn_u64(uint64x2_t a) {
return vmovn_u64(a);
}
-// CHECK-LABEL: @test_vmov_n_u8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
uint8x8_t test_vmov_n_u8(uint8_t a) {
return vmov_n_u8(a);
}
-// CHECK-LABEL: @test_vmov_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
uint16x4_t test_vmov_n_u16(uint16_t a) {
return vmov_n_u16(a);
}
-// CHECK-LABEL: @test_vmov_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VECINIT1_I]]
+//
uint32x2_t test_vmov_n_u32(uint32_t a) {
return vmov_n_u32(a);
}
-// CHECK-LABEL: @test_vmov_n_s8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
int8x8_t test_vmov_n_s8(int8_t a) {
return vmov_n_s8(a);
}
-// CHECK-LABEL: @test_vmov_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
int16x4_t test_vmov_n_s16(int16_t a) {
return vmov_n_s16(a);
}
-// CHECK-LABEL: @test_vmov_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VECINIT1_I]]
+//
int32x2_t test_vmov_n_s32(int32_t a) {
return vmov_n_s32(a);
}
-// CHECK-LABEL: @test_vmov_n_p8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VECINIT7_I]]
+//
poly8x8_t test_vmov_n_p8(poly8_t a) {
return vmov_n_p8(a);
}
-// CHECK-LABEL: @test_vmov_n_p16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VECINIT3_I]]
+//
poly16x4_t test_vmov_n_p16(poly16_t a) {
return vmov_n_p16(a);
}
-// CHECK-LABEL: @test_vmov_n_f16(
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: ret <4 x half> [[VECINIT3]]
+// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: ret <4 x half> [[VECINIT3]]
+//
float16x4_t test_vmov_n_f16(float16_t *a) {
return vmov_n_f16(*a);
}
-// CHECK-LABEL: @test_vmov_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
-// CHECK: ret <2 x float> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x float> @test_vmov_n_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[A]], i32 1
+// CHECK-NEXT: ret <2 x float> [[VECINIT1_I]]
+//
float32x2_t test_vmov_n_f32(float32_t a) {
return vmov_n_f32(a);
}
-// CHECK-LABEL: @test_vmovq_n_u8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
uint8x16_t test_vmovq_n_u8(uint8_t a) {
return vmovq_n_u8(a);
}
-// CHECK-LABEL: @test_vmovq_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
uint16x8_t test_vmovq_n_u16(uint16_t a) {
return vmovq_n_u16(a);
}
-// CHECK-LABEL: @test_vmovq_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
+//
uint32x4_t test_vmovq_n_u32(uint32_t a) {
return vmovq_n_u32(a);
}
-// CHECK-LABEL: @test_vmovq_n_s8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
int8x16_t test_vmovq_n_s8(int8_t a) {
return vmovq_n_s8(a);
}
-// CHECK-LABEL: @test_vmovq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
int16x8_t test_vmovq_n_s16(int16_t a) {
return vmovq_n_s16(a);
}
-// CHECK-LABEL: @test_vmovq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
+//
int32x4_t test_vmovq_n_s32(int32_t a) {
return vmovq_n_s32(a);
}
-// CHECK-LABEL: @test_vmovq_n_p8(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
-// CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
-// CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
-// CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
-// CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
-// CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
-// CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
-// CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
-// CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VECINIT15_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[A]], i32 7
+// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[A]], i32 8
+// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[A]], i32 9
+// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[A]], i32 10
+// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[A]], i32 11
+// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[A]], i32 12
+// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[A]], i32 13
+// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[A]], i32 14
+// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VECINIT15_I]]
+//
poly8x16_t test_vmovq_n_p8(poly8_t a) {
return vmovq_n_p8(a);
}
-// CHECK-LABEL: @test_vmovq_n_p16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VECINIT7_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[A]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[A]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[A]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[A]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VECINIT7_I]]
+//
poly16x8_t test_vmovq_n_p16(poly16_t a) {
return vmovq_n_p16(a);
}
-// CHECK-LABEL: @test_vmovq_n_f16(
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
-// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
-// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
-// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
-// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
-// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
-// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
-// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
-// CHECK: ret <8 x half> [[VECINIT7]]
+// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP0]], i32 0
+// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
+// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
+// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
+// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
+// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
+// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
+// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x half> [[VECINIT7]]
+//
float16x8_t test_vmovq_n_f16(float16_t *a) {
return vmovq_n_f16(*a);
}
-// CHECK-LABEL: @test_vmovq_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
-// CHECK: ret <4 x float> [[VECINIT3_I]]
+// CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[A]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[A]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[A]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VECINIT3_I]]
+//
float32x4_t test_vmovq_n_f32(float32_t a) {
return vmovq_n_f32(a);
}
-// CHECK-LABEL: @test_vmov_n_s64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
int64x1_t test_vmov_n_s64(int64_t a) {
int64x1_t tmp = vmov_n_s64(a);
return vadd_s64(tmp, tmp);
}
-// CHECK-LABEL: @test_vmov_n_u64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
-// CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
-// CHECK: ret <1 x i64> [[ADD_I]]
+// CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
+// CHECK-NEXT: ret <1 x i64> [[ADD_I]]
+//
uint64x1_t test_vmov_n_u64(uint64_t a) {
uint64x1_t tmp = vmov_n_u64(a);
return vadd_u64(tmp, tmp);
}
-// CHECK-LABEL: @test_vmovq_n_s64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VECINIT1_I]]
+//
int64x2_t test_vmovq_n_s64(int64_t a) {
return vmovq_n_s64(a);
}
-// CHECK-LABEL: @test_vmovq_n_u64(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VECINIT1_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VECINIT1_I]]
+//
uint64x2_t test_vmovq_n_u64(uint64_t a) {
return vmovq_n_u64(a);
}
-// CHECK-LABEL: @test_vmul_s8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[MUL_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmul_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[MUL_I]]
+//
int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
return vmul_s8(a, b);
}
-// CHECK-LABEL: @test_vmul_s16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
return vmul_s16(a, b);
}
-// CHECK-LABEL: @test_vmul_s32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
return vmul_s32(a, b);
}
-// CHECK-LABEL: @test_vmul_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
-// CHECK: ret <2 x float> [[MUL_I]]
+// CHECK-LABEL: define <2 x float> @test_vmul_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x float> [[MUL_I]]
+//
float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
return vmul_f32(a, b);
}
-// CHECK-LABEL: @test_vmul_u8(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[MUL_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmul_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[MUL_I]]
+//
uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
return vmul_u8(a, b);
}
-// CHECK-LABEL: @test_vmul_u16(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
return vmul_u16(a, b);
}
-// CHECK-LABEL: @test_vmul_u32(
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
return vmul_u32(a, b);
}
-// CHECK-LABEL: @test_vmulq_s8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[MUL_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[MUL_I]]
+//
int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
return vmulq_s8(a, b);
}
-// CHECK-LABEL: @test_vmulq_s16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
return vmulq_s16(a, b);
}
-// CHECK-LABEL: @test_vmulq_s32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
return vmulq_s32(a, b);
}
-// CHECK-LABEL: @test_vmulq_f32(
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
-// CHECK: ret <4 x float> [[MUL_I]]
+// CHECK-LABEL: define <4 x float> @test_vmulq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x float> [[MUL_I]]
+//
float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
return vmulq_f32(a, b);
}
-// CHECK-LABEL: @test_vmulq_u8(
-// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[MUL_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[MUL_I]]
+//
uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
return vmulq_u8(a, b);
}
-// CHECK-LABEL: @test_vmulq_u16(
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
return vmulq_u16(a, b);
}
-// CHECK-LABEL: @test_vmulq_u32(
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
return vmulq_u32(a, b);
}
-// CHECK-LABEL: @test_vmull_s8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmull_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
return vmull_s8(a, b);
}
-// CHECK-LABEL: @test_vmull_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
return vmull_s16(a, b);
}
-// CHECK-LABEL: @test_vmull_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
return vmull_s32(a, b);
}
-// CHECK-LABEL: @test_vmull_u8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmull_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
return vmull_u8(a, b);
}
-// CHECK-LABEL: @test_vmull_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
return vmull_u16(a, b);
}
-// CHECK-LABEL: @test_vmull_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
return vmull_u32(a, b);
}
-// CHECK-LABEL: @test_vmull_p8(
-// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i16> [[VMULL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmull_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VMULL_I]]
+//
poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
return vmull_p8(a, b);
}
-// CHECK-LABEL: @test_vmull_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
return vmull_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vmull_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
return vmull_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vmull_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
-// CHECK: ret <4 x i32> [[VMULL2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I]]
+//
uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
return vmull_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vmull_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
-// CHECK: ret <2 x i64> [[VMULL2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I]]
+//
uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
return vmull_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vmull_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
-// CHECK: ret <4 x i32> [[VMULL5_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
+//
int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
return vmull_n_s16(a, b);
}
-// CHECK-LABEL: @test_vmull_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
-// CHECK: ret <2 x i64> [[VMULL3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
+//
int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
return vmull_n_s32(a, b);
}
-// CHECK-LABEL: @test_vmull_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
-// CHECK: ret <4 x i32> [[VMULL5_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]]
+//
uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
return vmull_n_u16(a, b);
}
-// CHECK-LABEL: @test_vmull_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
-// CHECK: ret <2 x i64> [[VMULL3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]]
+//
uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
return vmull_n_u32(a, b);
}
-// CHECK-LABEL: @test_vmul_p8(
-// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VMUL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmul_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VMUL_V_I]]
+//
poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
return vmul_p8(a, b);
}
-// CHECK-LABEL: @test_vmulq_p8(
-// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VMULQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VMULQ_V_I]]
+//
poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
return vmulq_p8(a, b);
}
-// CHECK-LABEL: @test_vmul_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
-// CHECK: ret <4 x i16> [[MUL]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A]], [[LANE]]
+// CHECK-NEXT: ret <4 x i16> [[MUL]]
+//
int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
return vmul_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vmul_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
-// CHECK: ret <2 x i32> [[MUL]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A]], [[LANE]]
+// CHECK-NEXT: ret <2 x i32> [[MUL]]
+//
int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
return vmul_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vmul_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
-// CHECK: ret <2 x float> [[MUL]]
+// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A]], [[LANE]]
+// CHECK-NEXT: ret <2 x float> [[MUL]]
+//
float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
return vmul_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vmul_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
-// CHECK: ret <4 x i16> [[MUL]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A]], [[LANE]]
+// CHECK-NEXT: ret <4 x i16> [[MUL]]
+//
uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
return vmul_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vmul_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
-// CHECK: ret <2 x i32> [[MUL]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A]], [[LANE]]
+// CHECK-NEXT: ret <2 x i32> [[MUL]]
+//
uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
return vmul_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vmulq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
-// CHECK: ret <8 x i16> [[MUL]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A]], [[LANE]]
+// CHECK-NEXT: ret <8 x i16> [[MUL]]
+//
int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
return vmulq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vmulq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
-// CHECK: ret <4 x i32> [[MUL]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A]], [[LANE]]
+// CHECK-NEXT: ret <4 x i32> [[MUL]]
+//
int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
return vmulq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vmulq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
-// CHECK: ret <4 x float> [[MUL]]
+// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A]], [[LANE]]
+// CHECK-NEXT: ret <4 x float> [[MUL]]
+//
float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
return vmulq_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vmulq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
-// CHECK: ret <8 x i16> [[MUL]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A]], [[LANE]]
+// CHECK-NEXT: ret <8 x i16> [[MUL]]
+//
uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
return vmulq_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vmulq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
-// CHECK: ret <4 x i32> [[MUL]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A]], [[LANE]]
+// CHECK-NEXT: ret <4 x i32> [[MUL]]
+//
uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
return vmulq_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vmul_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
return vmul_n_s16(a, b);
}
-// CHECK-LABEL: @test_vmul_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A]], [[VECINIT1_I]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
return vmul_n_s32(a, b);
}
-// CHECK-LABEL: @test_vmul_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
-// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
-// CHECK: ret <2 x float> [[MUL_I]]
+// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[B]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[A]], [[VECINIT1_I]]
+// CHECK-NEXT: ret <2 x float> [[MUL_I]]
+//
float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
return vmul_n_f32(a, b);
}
-// CHECK-LABEL: @test_vmul_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
-// CHECK: ret <4 x i16> [[MUL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT: ret <4 x i16> [[MUL_I]]
+//
uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
return vmul_n_u16(a, b);
}
-// CHECK-LABEL: @test_vmul_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
-// CHECK: ret <2 x i32> [[MUL_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A]], [[VECINIT1_I]]
+// CHECK-NEXT: ret <2 x i32> [[MUL_I]]
+//
uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
return vmul_n_u32(a, b);
}
-// CHECK-LABEL: @test_vmulq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A]], [[VECINIT7_I]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
return vmulq_n_s16(a, b);
}
-// CHECK-LABEL: @test_vmulq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
return vmulq_n_s32(a, b);
}
-// CHECK-LABEL: @test_vmulq_n_f32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
-// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
-// CHECK: ret <4 x float> [[MUL_I]]
+// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[B]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT: ret <4 x float> [[MUL_I]]
+//
float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
return vmulq_n_f32(a, b);
}
-// CHECK-LABEL: @test_vmulq_n_u16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
-// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
-// CHECK: ret <8 x i16> [[MUL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A]], [[VECINIT7_I]]
+// CHECK-NEXT: ret <8 x i16> [[MUL_I]]
+//
uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
return vmulq_n_u16(a, b);
}
-// CHECK-LABEL: @test_vmulq_n_u32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
-// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
-// CHECK: ret <4 x i32> [[MUL_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
+// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A]], [[VECINIT3_I]]
+// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
+//
uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
return vmulq_n_u32(a, b);
}
-// CHECK-LABEL: @test_vmvn_s8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
int8x8_t test_vmvn_s8(int8x8_t a) {
return vmvn_s8(a);
}
-// CHECK-LABEL: @test_vmvn_s16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
-// CHECK: ret <4 x i16> [[NEG_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <4 x i16> [[NOT_I]]
+//
int16x4_t test_vmvn_s16(int16x4_t a) {
return vmvn_s16(a);
}
-// CHECK-LABEL: @test_vmvn_s32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
-// CHECK: ret <2 x i32> [[NEG_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <2 x i32> [[NOT_I]]
+//
int32x2_t test_vmvn_s32(int32x2_t a) {
return vmvn_s32(a);
}
-// CHECK-LABEL: @test_vmvn_u8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
uint8x8_t test_vmvn_u8(uint8x8_t a) {
return vmvn_u8(a);
}
-// CHECK-LABEL: @test_vmvn_u16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
-// CHECK: ret <4 x i16> [[NEG_I]]
+// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <4 x i16> [[NOT_I]]
+//
uint16x4_t test_vmvn_u16(uint16x4_t a) {
return vmvn_u16(a);
}
-// CHECK-LABEL: @test_vmvn_u32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
-// CHECK: ret <2 x i32> [[NEG_I]]
+// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <2 x i32> [[NOT_I]]
+//
uint32x2_t test_vmvn_u32(uint32x2_t a) {
return vmvn_u32(a);
}
-// CHECK-LABEL: @test_vmvn_p8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
-// CHECK: ret <8 x i8> [[NEG_I]]
+// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <8 x i8> [[NOT_I]]
+//
poly8x8_t test_vmvn_p8(poly8x8_t a) {
return vmvn_p8(a);
}
-// CHECK-LABEL: @test_vmvnq_s8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
int8x16_t test_vmvnq_s8(int8x16_t a) {
return vmvnq_s8(a);
}
-// CHECK-LABEL: @test_vmvnq_s16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
-// CHECK: ret <8 x i16> [[NEG_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <8 x i16> [[NOT_I]]
+//
int16x8_t test_vmvnq_s16(int16x8_t a) {
return vmvnq_s16(a);
}
-// CHECK-LABEL: @test_vmvnq_s32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
-// CHECK: ret <4 x i32> [[NEG_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <4 x i32> [[NOT_I]]
+//
int32x4_t test_vmvnq_s32(int32x4_t a) {
return vmvnq_s32(a);
}
-// CHECK-LABEL: @test_vmvnq_u8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
uint8x16_t test_vmvnq_u8(uint8x16_t a) {
return vmvnq_u8(a);
}
-// CHECK-LABEL: @test_vmvnq_u16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
-// CHECK: ret <8 x i16> [[NEG_I]]
+// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT: ret <8 x i16> [[NOT_I]]
+//
uint16x8_t test_vmvnq_u16(uint16x8_t a) {
return vmvnq_u16(a);
}
-// CHECK-LABEL: @test_vmvnq_u32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
-// CHECK: ret <4 x i32> [[NEG_I]]
+// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
+// CHECK-NEXT: ret <4 x i32> [[NOT_I]]
+//
uint32x4_t test_vmvnq_u32(uint32x4_t a) {
return vmvnq_u32(a);
}
-// CHECK-LABEL: @test_vmvnq_p8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
-// CHECK: ret <16 x i8> [[NEG_I]]
+// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[A]], splat (i8 -1)
+// CHECK-NEXT: ret <16 x i8> [[NOT_I]]
+//
poly8x16_t test_vmvnq_p8(poly8x16_t a) {
return vmvnq_p8(a);
}
-// CHECK-LABEL: @test_vneg_s8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define <8 x i8> @test_vneg_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vneg_s8(int8x8_t a) {
return vneg_s8(a);
}
-// CHECK-LABEL: @test_vneg_s16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vneg_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vneg_s16(int16x4_t a) {
return vneg_s16(a);
}
-// CHECK-LABEL: @test_vneg_s32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vneg_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vneg_s32(int32x2_t a) {
return vneg_s32(a);
}
-// CHECK-LABEL: @test_vneg_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <2 x float> %a
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define <2 x float> @test_vneg_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[A]]
+// CHECK-NEXT: ret <2 x float> [[FNEG_I]]
+//
float32x2_t test_vneg_f32(float32x2_t a) {
return vneg_f32(a);
}
-// CHECK-LABEL: @test_vnegq_s8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vnegq_s8(int8x16_t a) {
return vnegq_s8(a);
}
-// CHECK-LABEL: @test_vnegq_s16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vnegq_s16(int16x8_t a) {
return vnegq_s16(a);
}
-// CHECK-LABEL: @test_vnegq_s32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vnegq_s32(int32x4_t a) {
return vnegq_s32(a);
}
-// CHECK-LABEL: @test_vnegq_f32(
-// CHECK: [[SUB_I:%.*]] = fneg <4 x float> %a
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define <4 x float> @test_vnegq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[A]]
+// CHECK-NEXT: ret <4 x float> [[FNEG_I]]
+//
float32x4_t test_vnegq_f32(float32x4_t a) {
return vnegq_f32(a);
}
-// CHECK-LABEL: @test_vorn_s8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
-// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
-// CHECK: ret <8 x i8> [[OR_I]]
+// CHECK-LABEL: define <8 x i8> @test_vorn_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i8> [[OR_I]]
+//
int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
return vorn_s8(a, b);
}
-// CHECK-LABEL: @test_vorn_s16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
-// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
-// CHECK: ret <4 x i16> [[OR_I]]
+// CHECK-LABEL: define <4 x i16> @test_vorn_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i16> [[OR_I]]
+//
int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
return vorn_s16(a, b);
}
-// CHECK-LABEL: @test_vorn_s32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
-// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
-// CHECK: ret <2 x i32> [[OR_I]]
+// CHECK-LABEL: define <2 x i32> @test_vorn_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i32> [[OR_I]]
+//
int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
return vorn_s32(a, b);
}
-// CHECK-LABEL: @test_vorn_s64(
-// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
-// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
-// CHECK: ret <1 x i64> [[OR_I]]
+// CHECK-LABEL: define <1 x i64> @test_vorn_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <1 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <1 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <1 x i64> [[OR_I]]
+//
int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
return vorn_s64(a, b);
}
-// CHECK-LABEL: @test_vorn_u8(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
-// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
-// CHECK: ret <8 x i8> [[OR_I]]
+// CHECK-LABEL: define <8 x i8> @test_vorn_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i8> [[OR_I]]
+//
uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
return vorn_u8(a, b);
}
-// CHECK-LABEL: @test_vorn_u16(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
-// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
-// CHECK: ret <4 x i16> [[OR_I]]
+// CHECK-LABEL: define <4 x i16> @test_vorn_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i16> [[OR_I]]
+//
uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
return vorn_u16(a, b);
}
-// CHECK-LABEL: @test_vorn_u32(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
-// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
-// CHECK: ret <2 x i32> [[OR_I]]
+// CHECK-LABEL: define <2 x i32> @test_vorn_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i32> [[OR_I]]
+//
uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
return vorn_u32(a, b);
}
-// CHECK-LABEL: @test_vorn_u64(
-// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
-// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
-// CHECK: ret <1 x i64> [[OR_I]]
+// CHECK-LABEL: define <1 x i64> @test_vorn_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <1 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <1 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <1 x i64> [[OR_I]]
+//
uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
return vorn_u64(a, b);
}
-// CHECK-LABEL: @test_vornq_s8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
-// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
-// CHECK: ret <16 x i8> [[OR_I]]
+// CHECK-LABEL: define <16 x i8> @test_vornq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <16 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <16 x i8> [[OR_I]]
+//
int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
return vornq_s8(a, b);
}
-// CHECK-LABEL: @test_vornq_s16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
-// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
-// CHECK: ret <8 x i16> [[OR_I]]
+// CHECK-LABEL: define <8 x i16> @test_vornq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i16> [[OR_I]]
+//
int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
return vornq_s16(a, b);
}
-// CHECK-LABEL: @test_vornq_s32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
-// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
-// CHECK: ret <4 x i32> [[OR_I]]
+// CHECK-LABEL: define <4 x i32> @test_vornq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i32> [[OR_I]]
+//
int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
return vornq_s32(a, b);
}
-// CHECK-LABEL: @test_vornq_s64(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
-// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
-// CHECK: ret <2 x i64> [[OR_I]]
+// CHECK-LABEL: define <2 x i64> @test_vornq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i64> [[OR_I]]
+//
int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
return vornq_s64(a, b);
}
-// CHECK-LABEL: @test_vornq_u8(
-// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
-// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
-// CHECK: ret <16 x i8> [[OR_I]]
+// CHECK-LABEL: define <16 x i8> @test_vornq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[B]], splat (i8 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <16 x i8> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <16 x i8> [[OR_I]]
+//
uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
return vornq_u8(a, b);
}
-// CHECK-LABEL: @test_vornq_u16(
-// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
-// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
-// CHECK: ret <8 x i16> [[OR_I]]
+// CHECK-LABEL: define <8 x i16> @test_vornq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i16> [[B]], splat (i16 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i16> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <8 x i16> [[OR_I]]
+//
uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
return vornq_u16(a, b);
}
-// CHECK-LABEL: @test_vornq_u32(
-// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
-// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
-// CHECK: ret <4 x i32> [[OR_I]]
+// CHECK-LABEL: define <4 x i32> @test_vornq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B]], splat (i32 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <4 x i32> [[OR_I]]
+//
uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
return vornq_u32(a, b);
}
-// CHECK-LABEL: @test_vornq_u64(
-// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
-// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
-// CHECK: ret <2 x i64> [[OR_I]]
+// CHECK-LABEL: define <2 x i64> @test_vornq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[NOT_I:%.*]] = xor <2 x i64> [[B]], splat (i64 -1)
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i64> [[A]], [[NOT_I]]
+// CHECK-NEXT: ret <2 x i64> [[OR_I]]
+//
uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
return vornq_u64(a, b);
}
-// CHECK-LABEL: @test_vorr_s8(
-// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[OR_I]]
+// CHECK-LABEL: define <8 x i8> @test_vorr_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[OR_I]]
+//
int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
return vorr_s8(a, b);
}
-// CHECK-LABEL: @test_vorr_s16(
-// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[OR_I]]
+// CHECK-LABEL: define <4 x i16> @test_vorr_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[OR_I]]
+//
int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
return vorr_s16(a, b);
}
-// CHECK-LABEL: @test_vorr_s32(
-// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[OR_I]]
+// CHECK-LABEL: define <2 x i32> @test_vorr_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[OR_I]]
+//
int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
return vorr_s32(a, b);
}
-// CHECK-LABEL: @test_vorr_s64(
-// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[OR_I]]
+// CHECK-LABEL: define <1 x i64> @test_vorr_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[OR_I]]
+//
int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
return vorr_s64(a, b);
}
-// CHECK-LABEL: @test_vorr_u8(
-// CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[OR_I]]
+// CHECK-LABEL: define <8 x i8> @test_vorr_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[OR_I]]
+//
uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
return vorr_u8(a, b);
}
-// CHECK-LABEL: @test_vorr_u16(
-// CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[OR_I]]
+// CHECK-LABEL: define <4 x i16> @test_vorr_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[OR_I]]
+//
uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
return vorr_u16(a, b);
}
-// CHECK-LABEL: @test_vorr_u32(
-// CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[OR_I]]
+// CHECK-LABEL: define <2 x i32> @test_vorr_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[OR_I]]
+//
uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
return vorr_u32(a, b);
}
-// CHECK-LABEL: @test_vorr_u64(
-// CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[OR_I]]
+// CHECK-LABEL: define <1 x i64> @test_vorr_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[OR_I]]
+//
uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
return vorr_u64(a, b);
}
-// CHECK-LABEL: @test_vorrq_s8(
-// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[OR_I]]
+// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[OR_I]]
+//
int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
return vorrq_s8(a, b);
}
-// CHECK-LABEL: @test_vorrq_s16(
-// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[OR_I]]
+// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[OR_I]]
+//
int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
return vorrq_s16(a, b);
}
-// CHECK-LABEL: @test_vorrq_s32(
-// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[OR_I]]
+// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[OR_I]]
+//
int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
return vorrq_s32(a, b);
}
-// CHECK-LABEL: @test_vorrq_s64(
-// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[OR_I]]
+// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[OR_I]]
+//
int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
return vorrq_s64(a, b);
}
-// CHECK-LABEL: @test_vorrq_u8(
-// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[OR_I]]
+// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[OR_I]]
+//
uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
return vorrq_u8(a, b);
}
-// CHECK-LABEL: @test_vorrq_u16(
-// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[OR_I]]
+// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[OR_I]]
+//
uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
return vorrq_u16(a, b);
}
-// CHECK-LABEL: @test_vorrq_u32(
-// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[OR_I]]
+// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[OR_I]]
+//
uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
return vorrq_u32(a, b);
}
-// CHECK-LABEL: @test_vorrq_u64(
-// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[OR_I]]
+// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OR_I:%.*]] = or <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[OR_I]]
+//
uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
return vorrq_u64(a, b);
}
-// CHECK-LABEL: @test_vpadal_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
-// CHECK: ret <4 x i16> [[VPADAL_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i16> [[VPADAL_V1_I]]
+//
int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
return vpadal_s8(a, b);
}
-// CHECK-LABEL: @test_vpadal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
-// CHECK: ret <2 x i32> [[VPADAL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADAL_V2_I]]
+//
int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
return vpadal_s16(a, b);
}
-// CHECK-LABEL: @test_vpadal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
-// CHECK: ret <1 x i64> [[VPADAL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADAL_V2_I]]
+//
int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
return vpadal_s32(a, b);
}
-// CHECK-LABEL: @test_vpadal_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
-// CHECK: ret <4 x i16> [[VPADAL_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <4 x i16> [[VPADAL_V1_I]]
+//
uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
return vpadal_u8(a, b);
}
-// CHECK-LABEL: @test_vpadal_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
-// CHECK: ret <2 x i32> [[VPADAL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADAL_V2_I]]
+//
uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
return vpadal_u16(a, b);
}
-// CHECK-LABEL: @test_vpadal_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
-// CHECK: ret <1 x i64> [[VPADAL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADAL_V2_I]]
+//
uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
return vpadal_u32(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
-// CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VPADALQ_V1_I]]
+//
int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
return vpadalq_s8(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
-// CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADALQ_V2_I]]
+//
int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
return vpadalq_s16(a, b);
}
-// CHECK-LABEL: @test_vpadalq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
-// CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADALQ_V2_I]]
+//
int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
return vpadalq_s32(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
-// CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i16> [[VPADALQ_V1_I]]
+//
uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
return vpadalq_u8(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
-// CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADALQ_V2_I]]
+//
uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
return vpadalq_u16(a, b);
}
-// CHECK-LABEL: @test_vpadalq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
-// CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADALQ_V2_I]]
+//
uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
return vpadalq_u32(a, b);
}
-// CHECK-LABEL: @test_vpadd_s8(
-// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPADD_V_I]]
+//
int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
return vpadd_s8(a, b);
}
-// CHECK-LABEL: @test_vpadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
return vpadd_s16(a, b);
}
-// CHECK-LABEL: @test_vpadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
return vpadd_s32(a, b);
}
-// CHECK-LABEL: @test_vpadd_u8(
-// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPADD_V_I]]
+//
uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
return vpadd_u8(a, b);
}
-// CHECK-LABEL: @test_vpadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
return vpadd_u16(a, b);
}
-// CHECK-LABEL: @test_vpadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
return vpadd_u32(a, b);
}
-// CHECK-LABEL: @test_vpadd_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VPADD_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vpadd_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]])
+// CHECK-NEXT: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
return vpadd_f32(a, b);
}
-// CHECK-LABEL: @test_vpaddl_s8(
-// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
-// CHECK: ret <4 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i16> [[VPADDL_I]]
+//
int16x4_t test_vpaddl_s8(int8x8_t a) {
return vpaddl_s8(a);
}
-// CHECK-LABEL: @test_vpaddl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
-// CHECK: ret <2 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADDL1_I]]
+//
int32x2_t test_vpaddl_s16(int16x4_t a) {
return vpaddl_s16(a);
}
-// CHECK-LABEL: @test_vpaddl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
-// CHECK: ret <1 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADDL1_I]]
+//
int64x1_t test_vpaddl_s32(int32x2_t a) {
return vpaddl_s32(a);
}
-// CHECK-LABEL: @test_vpaddl_u8(
-// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
-// CHECK: ret <4 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <4 x i16> [[VPADDL_I]]
+//
uint16x4_t test_vpaddl_u8(uint8x8_t a) {
return vpaddl_u8(a);
}
-// CHECK-LABEL: @test_vpaddl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
-// CHECK: ret <2 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i32> [[VPADDL1_I]]
+//
uint32x2_t test_vpaddl_u16(uint16x4_t a) {
return vpaddl_u16(a);
}
-// CHECK-LABEL: @test_vpaddl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
-// CHECK: ret <1 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <1 x i64> [[VPADDL1_I]]
+//
uint64x1_t test_vpaddl_u32(uint32x2_t a) {
return vpaddl_u32(a);
}
-// CHECK-LABEL: @test_vpaddlq_s8(
-// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
-// CHECK: ret <8 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i16> [[VPADDL_I]]
+//
int16x8_t test_vpaddlq_s8(int8x16_t a) {
return vpaddlq_s8(a);
}
-// CHECK-LABEL: @test_vpaddlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
-// CHECK: ret <4 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADDL1_I]]
+//
int32x4_t test_vpaddlq_s16(int16x8_t a) {
return vpaddlq_s16(a);
}
-// CHECK-LABEL: @test_vpaddlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
-// CHECK: ret <2 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADDL1_I]]
+//
int64x2_t test_vpaddlq_s32(int32x4_t a) {
return vpaddlq_s32(a);
}
-// CHECK-LABEL: @test_vpaddlq_u8(
-// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
-// CHECK: ret <8 x i16> [[VPADDL_I]]
+// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i16> [[VPADDL_I]]
+//
uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
return vpaddlq_u8(a);
}
-// CHECK-LABEL: @test_vpaddlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
-// CHECK: ret <4 x i32> [[VPADDL1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> [[VPADDL_I]])
+// CHECK-NEXT: ret <4 x i32> [[VPADDL1_I]]
+//
uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
return vpaddlq_u16(a);
}
-// CHECK-LABEL: @test_vpaddlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
-// CHECK: ret <2 x i64> [[VPADDL1_I]]
+// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> [[VPADDL_I]])
+// CHECK-NEXT: ret <2 x i64> [[VPADDL1_I]]
+//
uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
return vpaddlq_u32(a);
}
-// CHECK-LABEL: @test_vpmax_s8(
-// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMAX_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMAX_V_I]]
+//
int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
return vpmax_s8(a, b);
}
-// CHECK-LABEL: @test_vpmax_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPMAX_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]])
+// CHECK-NEXT: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
return vpmax_s16(a, b);
}
-// CHECK-LABEL: @test_vpmax_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPMAX_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]])
+// CHECK-NEXT: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
return vpmax_s32(a, b);
}
-// CHECK-LABEL: @test_vpmax_u8(
-// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMAX_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMAX_V_I]]
+//
uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
return vpmax_u8(a, b);
}
-// CHECK-LABEL: @test_vpmax_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPMAX_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]])
+// CHECK-NEXT: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
return vpmax_u16(a, b);
}
-// CHECK-LABEL: @test_vpmax_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPMAX_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]])
+// CHECK-NEXT: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
return vpmax_u32(a, b);
}
-// CHECK-LABEL: @test_vpmax_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VPMAX_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vpmax_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> [[VPMAX_V_I]], <2 x float> [[VPMAX_V1_I]])
+// CHECK-NEXT: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
return vpmax_f32(a, b);
}
-// CHECK-LABEL: @test_vpmin_s8(
-// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMIN_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMIN_V_I]]
+//
int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
return vpmin_s8(a, b);
}
-// CHECK-LABEL: @test_vpmin_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPMIN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]])
+// CHECK-NEXT: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
return vpmin_s16(a, b);
}
-// CHECK-LABEL: @test_vpmin_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPMIN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]])
+// CHECK-NEXT: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
return vpmin_s32(a, b);
}
-// CHECK-LABEL: @test_vpmin_u8(
-// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VPMIN_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VPMIN_V_I]]
+//
uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
return vpmin_u8(a, b);
}
-// CHECK-LABEL: @test_vpmin_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VPMIN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]])
+// CHECK-NEXT: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
return vpmin_u16(a, b);
}
-// CHECK-LABEL: @test_vpmin_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VPMIN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]])
+// CHECK-NEXT: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
return vpmin_u32(a, b);
}
-// CHECK-LABEL: @test_vpmin_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VPMIN_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vpmin_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> [[VPMIN_V_I]], <2 x float> [[VPMIN_V1_I]])
+// CHECK-NEXT: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
return vpmin_f32(a, b);
}
-// CHECK-LABEL: @test_vqabs_s8(
-// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VQABS_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VQABS_V_I]]
+//
int8x8_t test_vqabs_s8(int8x8_t a) {
return vqabs_s8(a);
}
-// CHECK-LABEL: @test_vqabs_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
-// CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQABS_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> [[VQABS_V_I]])
+// CHECK-NEXT: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqabs_s16(int16x4_t a) {
return vqabs_s16(a);
}
-// CHECK-LABEL: @test_vqabs_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
-// CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQABS_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> [[VQABS_V_I]])
+// CHECK-NEXT: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqabs_s32(int32x2_t a) {
return vqabs_s32(a);
}
-// CHECK-LABEL: @test_vqabsq_s8(
-// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VQABSQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VQABSQ_V_I]]
+//
int8x16_t test_vqabsq_s8(int8x16_t a) {
return vqabsq_s8(a);
}
-// CHECK-LABEL: @test_vqabsq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
-// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQABSQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> [[VQABSQ_V_I]])
+// CHECK-NEXT: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vqabsq_s16(int16x8_t a) {
return vqabsq_s16(a);
}
-// CHECK-LABEL: @test_vqabsq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
-// CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQABSQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> [[VQABSQ_V_I]])
+// CHECK-NEXT: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vqabsq_s32(int32x4_t a) {
return vqabsq_s32(a);
}
-// CHECK-LABEL: @test_vqadd_s8(
-// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQADD_V_I]]
+//
int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
return vqadd_s8(a, b);
}
-// CHECK-LABEL: @test_vqadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
return vqadd_s16(a, b);
}
-// CHECK-LABEL: @test_vqadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
return vqadd_s32(a, b);
}
-// CHECK-LABEL: @test_vqadd_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQADD_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
return vqadd_s64(a, b);
}
-// CHECK-LABEL: @test_vqadd_u8(
-// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQADD_V_I]]
+//
uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
return vqadd_u8(a, b);
}
-// CHECK-LABEL: @test_vqadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
return vqadd_u16(a, b);
}
-// CHECK-LABEL: @test_vqadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
return vqadd_u32(a, b);
}
-// CHECK-LABEL: @test_vqadd_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQADD_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]])
+// CHECK-NEXT: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
return vqadd_u64(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s8(
-// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQADDQ_V_I]]
+//
int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
return vqaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
return vqaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
return vqaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vqaddq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
return vqaddq_s64(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u8(
-// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQADDQ_V_I]]
+//
uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
return vqaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
return vqaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
return vqaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vqaddq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]])
+// CHECK-NEXT: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
return vqaddq_u64(a, b);
}
-// CHECK-LABEL: @test_vqdmlal_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
+//
int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlal_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
+//
int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlal_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]]
+//
int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vqdmlal_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]]
+//
int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vqdmlal_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
-// CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]]
+//
int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlal_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlal_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
-// CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]]
+//
int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlal_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
+//
int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
+//
int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
-// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]]
+//
int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_lane_s16(a, b, c, 3);
}
-// CHECK-LABEL: @test_vqdmlsl_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
-// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
-// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <2 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]]
+//
int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_lane_s32(a, b, c, 1);
}
-// CHECK-LABEL: @test_vqdmlsl_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
-// CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], i16 noundef signext [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]]
+//
int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlsl_n_s16(a, b, c);
}
-// CHECK-LABEL: @test_vqdmlsl_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %c, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
-// CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]])
+// CHECK-NEXT: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]])
+// CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]]
+//
int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlsl_n_s32(a, b, c);
}
-// CHECK-LABEL: @test_vqdmulh_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
return vqdmulh_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulh_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
return vqdmulh_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqdmulhq_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqdmulhq_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmulh_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
return vqdmulh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqdmulh_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
-// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
return vqdmulh_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vqdmulhq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
return vqdmulhq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqdmulhq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
-// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
return vqdmulhq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vqdmulh_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQDMULH_V5_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULH_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I_I]], <4 x i16> [[VQDMULH_V1_I_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
return vqdmulh_n_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulh_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQDMULH_V3_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULH_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULH_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I_I]], <2 x i32> [[VQDMULH_V1_I_I]])
+// CHECK-NEXT: [[VQDMULH_V3_I_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
return vqdmulh_n_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
-// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
-// CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQDMULHQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I_I]], <8 x i16> [[VQDMULHQ_V1_I_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqdmulhq_n_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmulhq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
-// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
-// CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
+// CHECK-NEXT: [[VQDMULHQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQDMULHQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I_I]], <4 x i32> [[VQDMULHQ_V1_I_I]])
+// CHECK-NEXT: [[VQDMULHQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqdmulhq_n_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmull_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
return vqdmull_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmull_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
return vqdmull_s32(a, b);
}
-// CHECK-LABEL: @test_vqdmull_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
return vqdmull_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqdmull_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
-// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
+//
int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
return vqdmull_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vqdmull_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQDMULL_V5_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
return vqdmull_n_s16(a, b);
}
-// CHECK-LABEL: @test_vqdmull_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQDMULL_V3_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]])
+// CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
return vqdmull_n_s32(a, b);
}
-// CHECK-LABEL: @test_vqmovn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> [[VQMOVN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVN_V1_I]]
+//
int8x8_t test_vqmovn_s16(int16x8_t a) {
return vqmovn_s16(a);
}
-// CHECK-LABEL: @test_vqmovn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqmovn_s32(int32x4_t a) {
return vqmovn_s32(a);
}
-// CHECK-LABEL: @test_vqmovn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqmovn_s64(int64x2_t a) {
return vqmovn_s64(a);
}
-// CHECK-LABEL: @test_vqmovn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> [[VQMOVN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVN_V1_I]]
+//
uint8x8_t test_vqmovn_u16(uint16x8_t a) {
return vqmovn_u16(a);
}
-// CHECK-LABEL: @test_vqmovn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vqmovn_u32(uint32x4_t a) {
return vqmovn_u32(a);
}
-// CHECK-LABEL: @test_vqmovn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> [[VQMOVN_V_I]])
+// CHECK-NEXT: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vqmovn_u64(uint64x2_t a) {
return vqmovn_u64(a);
}
-// CHECK-LABEL: @test_vqmovun_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
-// CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> [[VQMOVUN_V_I]])
+// CHECK-NEXT: ret <8 x i8> [[VQMOVUN_V1_I]]
+//
uint8x8_t test_vqmovun_s16(int16x8_t a) {
return vqmovun_s16(a);
}
-// CHECK-LABEL: @test_vqmovun_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
-// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> [[VQMOVUN_V_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vqmovun_s32(int32x4_t a) {
return vqmovun_s32(a);
}
-// CHECK-LABEL: @test_vqmovun_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
-// CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQMOVUN_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> [[VQMOVUN_V_I]])
+// CHECK-NEXT: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
uint32x2_t test_vqmovun_s64(int64x2_t a) {
return vqmovun_s64(a);
}
-// CHECK-LABEL: @test_vqneg_s8(
-// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
-// CHECK: ret <8 x i8> [[VQNEG_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT: ret <8 x i8> [[VQNEG_V_I]]
+//
int8x8_t test_vqneg_s8(int8x8_t a) {
return vqneg_s8(a);
}
-// CHECK-LABEL: @test_vqneg_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
-// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQNEG_V1_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> [[VQNEG_V_I]])
+// CHECK-NEXT: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vqneg_s16(int16x4_t a) {
return vqneg_s16(a);
}
-// CHECK-LABEL: @test_vqneg_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
-// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQNEG_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> [[VQNEG_V_I]])
+// CHECK-NEXT: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
int32x2_t test_vqneg_s32(int32x2_t a) {
return vqneg_s32(a);
}
-// CHECK-LABEL: @test_vqnegq_s8(
-// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
-// CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT: ret <16 x i8> [[VQNEGQ_V_I]]
+//
int8x16_t test_vqnegq_s8(int8x16_t a) {
return vqnegq_s8(a);
}
-// CHECK-LABEL: @test_vqnegq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
-// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQNEGQ_V1_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]])
+// CHECK-NEXT: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vqnegq_s16(int16x8_t a) {
return vqnegq_s16(a);
}
-// CHECK-LABEL: @test_vqnegq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
-// CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQNEGQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]])
+// CHECK-NEXT: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
int32x4_t test_vqnegq_s32(int32x4_t a) {
return vqnegq_s32(a);
}
-// CHECK-LABEL: @test_vqrdmulh_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
return vqrdmulh_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulh_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
return vqrdmulh_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
return vqrdmulhq_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
return vqrdmulhq_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulh_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
return vqrdmulh_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqrdmulh_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
-// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
-// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
return vqrdmulh_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vqrdmulhq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
return vqrdmulhq_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vqrdmulhq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
-// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
-// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
return vqrdmulhq_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vqrdmulh_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
-// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
-// CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRDMULH_V5_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I_I]], <4 x i16> [[VQRDMULH_V1_I_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
return vqrdmulh_n_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulh_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
-// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
-// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRDMULH_V3_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
+// CHECK-NEXT: [[VQRDMULH_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I_I]], <2 x i32> [[VQRDMULH_V1_I_I]])
+// CHECK-NEXT: [[VQRDMULH_V3_I_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
return vqrdmulh_n_s32(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_n_s16(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
-// CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
-// CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
-// CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
-// CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
-// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
-// CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3
+// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4
+// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5
+// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6
+// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I_I]], <8 x i16> [[VQRDMULHQ_V1_I_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
return vqrdmulhq_n_s16(a, b);
}
-// CHECK-LABEL: @test_vqrdmulhq_n_s32(
-// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
-// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
-// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
-// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
-// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
-// CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i32 0
+// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1
+// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2
+// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
+// CHECK-NEXT: [[VQRDMULHQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I_I]], <4 x i32> [[VQRDMULHQ_V1_I_I]])
+// CHECK-NEXT: [[VQRDMULHQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
return vqrdmulhq_n_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s8(
-// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQRSHL_V_I]]
+//
int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
return vqrshl_s8(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
return vqrshl_s16(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
return vqrshl_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
return vqrshl_s64(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u8(
-// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQRSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQRSHL_V_I]]
+//
uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
return vqrshl_u8(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
return vqrshl_u16(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
return vqrshl_u32(a, b);
}
-// CHECK-LABEL: @test_vqrshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]])
+// CHECK-NEXT: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
return vqrshl_u64(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s8(
-// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQRSHLQ_V_I]]
+//
int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
return vqrshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
return vqrshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
return vqrshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
return vqrshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u8(
-// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQRSHLQ_V_I]]
+//
uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
return vqrshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
return vqrshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
return vqrshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vqrshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]])
+// CHECK-NEXT: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
return vqrshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vqrshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRN_N1]]
+//
int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
return vqrshrn_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqrshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRN_N1]]
+//
int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
return vqrshrn_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqrshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRN_N1]]
+//
int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
return vqrshrn_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqrshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRN_N1]]
+//
uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
return vqrshrn_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vqrshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRN_N1]]
+//
uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
return vqrshrn_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vqrshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQRSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRN_N1]]
+//
uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
return vqrshrn_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqrshrun_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQRSHRUN_N1]]
+//
uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
return vqrshrun_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqrshrun_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQRSHRUN_N1]]
+//
uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
return vqrshrun_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqrshrun_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQRSHRUN_N1]]
+//
uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
return vqrshrun_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshl_s8(
-// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_V_I]]
+//
int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
return vqshl_s8(a, b);
}
-// CHECK-LABEL: @test_vqshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
return vqshl_s16(a, b);
}
-// CHECK-LABEL: @test_vqshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
return vqshl_s32(a, b);
}
-// CHECK-LABEL: @test_vqshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
return vqshl_s64(a, b);
}
-// CHECK-LABEL: @test_vqshl_u8(
-// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_V_I]]
+//
uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
return vqshl_u8(a, b);
}
-// CHECK-LABEL: @test_vqshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
return vqshl_u16(a, b);
}
-// CHECK-LABEL: @test_vqshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
return vqshl_u32(a, b);
}
-// CHECK-LABEL: @test_vqshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]])
+// CHECK-NEXT: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
return vqshl_u64(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s8(
-// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSHLQ_V_I]]
+//
int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
return vqshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
return vqshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
return vqshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vqshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
return vqshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u8(
-// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSHLQ_V_I]]
+//
uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
return vqshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
return vqshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
return vqshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vqshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]])
+// CHECK-NEXT: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
return vqshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vqshlu_n_s8(
-// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1)
-// CHECK: ret <8 x i8> [[VQSHLU_N]]
+// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHLU_N]]
+//
uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
return vqshlu_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vqshlu_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 1)
-// CHECK: ret <4 x i16> [[VQSHLU_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHLU_N1]]
+//
uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
return vqshlu_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshlu_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 1))
-// CHECK: ret <2 x i32> [[VQSHLU_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHLU_N1]]
+//
uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
return vqshlu_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshlu_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHLU_N1]]
+// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHLU_N1]]
+//
uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
return vqshlu_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshluq_n_s8(
-// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VQSHLU_N]]
+// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VQSHLU_N]]
+//
uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
return vqshluq_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vqshluq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VQSHLU_N1]]
+// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VQSHLU_N1]]
+//
uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
return vqshluq_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshluq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 1))
-// CHECK: ret <4 x i32> [[VQSHLU_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 1))
+// CHECK-NEXT: ret <4 x i32> [[VQSHLU_N1]]
+//
uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
return vqshluq_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshluq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 1))
-// CHECK: ret <2 x i64> [[VQSHLU_N1]]
+// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 1))
+// CHECK-NEXT: ret <2 x i64> [[VQSHLU_N1]]
+//
uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
return vqshluq_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_s8(
-// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1))
-// CHECK: ret <8 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_N]]
+//
int8x8_t test_vqshl_n_s8(int8x8_t a) {
return vqshl_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1))
-// CHECK: ret <4 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHL_N1]]
+//
int16x4_t test_vqshl_n_s16(int16x4_t a) {
return vqshl_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1))
-// CHECK: ret <2 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHL_N1]]
+//
int32x2_t test_vqshl_n_s32(int32x2_t a) {
return vqshl_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHL_N1]]
+//
int64x1_t test_vqshl_n_s64(int64x1_t a) {
return vqshl_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_u8(
-// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1))
-// CHECK: ret <8 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHL_N]]
+//
uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
return vqshl_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1))
-// CHECK: ret <4 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHL_N1]]
+//
uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
return vqshl_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1))
-// CHECK: ret <2 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHL_N1]]
+//
uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
return vqshl_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vqshl_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VQSHL_N1]]
+//
uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
return vqshl_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_s8(
-// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VQSHL_N]]
+//
int8x16_t test_vqshlq_n_s8(int8x16_t a) {
return vqshlq_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VQSHL_N1]]
+//
int16x8_t test_vqshlq_n_s16(int16x8_t a) {
return vqshlq_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1))
-// CHECK: ret <4 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1))
+// CHECK-NEXT: ret <4 x i32> [[VQSHL_N1]]
+//
int32x4_t test_vqshlq_n_s32(int32x4_t a) {
return vqshlq_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1))
-// CHECK: ret <2 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1))
+// CHECK-NEXT: ret <2 x i64> [[VQSHL_N1]]
+//
int64x2_t test_vqshlq_n_s64(int64x2_t a) {
return vqshlq_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_u8(
-// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VQSHL_N]]
+// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VQSHL_N]]
+//
uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
return vqshlq_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VQSHL_N1]]
+// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VQSHL_N1]]
+//
uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
return vqshlq_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1))
-// CHECK: ret <4 x i32> [[VQSHL_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1))
+// CHECK-NEXT: ret <4 x i32> [[VQSHL_N1]]
+//
uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
return vqshlq_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vqshlq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1))
-// CHECK: ret <2 x i64> [[VQSHL_N1]]
+// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1))
+// CHECK-NEXT: ret <2 x i64> [[VQSHL_N1]]
+//
uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
return vqshlq_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]]
+//
int8x8_t test_vqshrn_n_s16(int16x8_t a) {
return vqshrn_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]]
+//
int16x4_t test_vqshrn_n_s32(int32x4_t a) {
return vqshrn_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]]
+//
int32x2_t test_vqshrn_n_s64(int64x2_t a) {
return vqshrn_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHRN_N1]]
+//
uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
return vqshrn_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHRN_N1]]
+//
uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
return vqshrn_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vqshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHRN_N1]]
+//
uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
return vqshrn_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vqshrun_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VQSHRUN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VQSHRUN_N1]]
+//
uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
return vqshrun_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vqshrun_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VQSHRUN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VQSHRUN_N1]]
+//
uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
return vqshrun_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vqshrun_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VQSHRUN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VQSHRUN_N1]]
+//
uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
return vqshrun_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vqsub_s8(
-// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSUB_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSUB_V_I]]
+//
int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
return vqsub_s8(a, b);
}
-// CHECK-LABEL: @test_vqsub_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
return vqsub_s16(a, b);
}
-// CHECK-LABEL: @test_vqsub_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
return vqsub_s32(a, b);
}
-// CHECK-LABEL: @test_vqsub_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
return vqsub_s64(a, b);
}
-// CHECK-LABEL: @test_vqsub_u8(
-// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VQSUB_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VQSUB_V_I]]
+//
uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
return vqsub_u8(a, b);
}
-// CHECK-LABEL: @test_vqsub_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
return vqsub_u16(a, b);
}
-// CHECK-LABEL: @test_vqsub_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
return vqsub_u32(a, b);
}
-// CHECK-LABEL: @test_vqsub_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VQSUB_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]])
+// CHECK-NEXT: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
return vqsub_u64(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s8(
-// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSUBQ_V_I]]
+//
int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
return vqsubq_s8(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
return vqsubq_s16(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
return vqsubq_s32(a, b);
}
-// CHECK-LABEL: @test_vqsubq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
return vqsubq_s64(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u8(
-// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VQSUBQ_V_I]]
+//
uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
return vqsubq_u8(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
return vqsubq_u16(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
return vqsubq_u32(a, b);
}
-// CHECK-LABEL: @test_vqsubq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]])
+// CHECK-NEXT: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
return vqsubq_u64(a, b);
}
-// CHECK-LABEL: @test_vraddhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRADDHN_V2_I]]
+//
int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
return vraddhn_s16(a, b);
}
-// CHECK-LABEL: @test_vraddhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
return vraddhn_s32(a, b);
}
-// CHECK-LABEL: @test_vraddhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
return vraddhn_s64(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRADDHN_V2_I]]
+//
uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
return vraddhn_u16(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
return vraddhn_u32(a, b);
}
-// CHECK-LABEL: @test_vraddhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]])
+// CHECK-NEXT: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
return vraddhn_u64(a, b);
}
-// CHECK-LABEL: @test_vrecpe_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRECPE_V1_I]]
+// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> [[VRECPE_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VRECPE_V1_I]]
+//
float32x2_t test_vrecpe_f32(float32x2_t a) {
return vrecpe_f32(a);
}
-// CHECK-LABEL: @test_vrecpe_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VRECPE_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> [[VRECPE_V_I]])
+// CHECK-NEXT: ret <2 x i32> [[VRECPE_V1_I]]
+//
uint32x2_t test_vrecpe_u32(uint32x2_t a) {
return vrecpe_u32(a);
}
-// CHECK-LABEL: @test_vrecpeq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
+// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> [[VRECPEQ_V_I]])
+// CHECK-NEXT: ret <4 x float> [[VRECPEQ_V1_I]]
+//
float32x4_t test_vrecpeq_f32(float32x4_t a) {
return vrecpeq_f32(a);
}
-// CHECK-LABEL: @test_vrecpeq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]])
+// CHECK-NEXT: ret <4 x i32> [[VRECPEQ_V1_I]]
+//
uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
return vrecpeq_u32(a);
}
-// CHECK-LABEL: @test_vrecps_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VRECPS_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vrecps_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]])
+// CHECK-NEXT: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
return vrecps_f32(a, b);
}
-// CHECK-LABEL: @test_vrecpsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
+// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]])
+// CHECK-NEXT: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
return vrecpsq_f32(a, b);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
return vreinterpret_s8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
return vreinterpret_s8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
return vreinterpret_s8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
return vreinterpret_s8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
return vreinterpret_s8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
return vreinterpret_s8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
return vreinterpret_s8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
return vreinterpret_s8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
return vreinterpret_s8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_p8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
return vreinterpret_s8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
return vreinterpret_s8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
return vreinterpret_s16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
return vreinterpret_s16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
return vreinterpret_s16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
return vreinterpret_s16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
return vreinterpret_s16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
return vreinterpret_s16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
return vreinterpret_s16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
return vreinterpret_s16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
return vreinterpret_s16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
return vreinterpret_s16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s16_p16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
return vreinterpret_s16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
return vreinterpret_s32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
return vreinterpret_s32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
return vreinterpret_s32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
return vreinterpret_s32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
return vreinterpret_s32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u32(
-// CHECK: ret <2 x i32> %a
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i32> [[A]]
+//
int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
return vreinterpret_s32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
return vreinterpret_s32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
return vreinterpret_s32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
return vreinterpret_s32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
return vreinterpret_s32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
return vreinterpret_s32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
return vreinterpret_s64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
return vreinterpret_s64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
return vreinterpret_s64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
return vreinterpret_s64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
return vreinterpret_s64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
return vreinterpret_s64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_u64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
return vreinterpret_s64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
return vreinterpret_s64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
return vreinterpret_s64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
return vreinterpret_s64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_s64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
return vreinterpret_s64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
return vreinterpret_u8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
return vreinterpret_u8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
return vreinterpret_u8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
return vreinterpret_u8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
return vreinterpret_u8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
return vreinterpret_u8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
return vreinterpret_u8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
return vreinterpret_u8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
return vreinterpret_u8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_p8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
return vreinterpret_u8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
return vreinterpret_u8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
return vreinterpret_u16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
return vreinterpret_u16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
return vreinterpret_u16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
return vreinterpret_u16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
return vreinterpret_u16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
return vreinterpret_u16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
return vreinterpret_u16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
return vreinterpret_u16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
return vreinterpret_u16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
return vreinterpret_u16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u16_p16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
return vreinterpret_u16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
return vreinterpret_u32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
return vreinterpret_u32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s32(
-// CHECK: ret <2 x i32> %a
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i32> [[A]]
+//
uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
return vreinterpret_u32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
return vreinterpret_u32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
return vreinterpret_u32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
return vreinterpret_u32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
return vreinterpret_u32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
return vreinterpret_u32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
return vreinterpret_u32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
return vreinterpret_u32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
-// CHECK: ret <2 x i32> [[TMP0]]
+// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
return vreinterpret_u32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
return vreinterpret_u64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
return vreinterpret_u64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
return vreinterpret_u64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_s64(
-// CHECK: ret <1 x i64> %a
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <1 x i64> [[A]]
+//
uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
return vreinterpret_u64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
return vreinterpret_u64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
return vreinterpret_u64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
return vreinterpret_u64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
return vreinterpret_u64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
return vreinterpret_u64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
return vreinterpret_u64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_u64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
-// CHECK: ret <1 x i64> [[TMP0]]
+// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to i64
+// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
return vreinterpret_u64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
return vreinterpret_f16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
return vreinterpret_f16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
return vreinterpret_f16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
return vreinterpret_f16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
return vreinterpret_f16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
return vreinterpret_f16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
return vreinterpret_f16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
return vreinterpret_f16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
return vreinterpret_f16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP1]]
+//
float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
return vreinterpret_f16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f16_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
-// CHECK: ret <4 x half> [[TMP0]]
+// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP0]]
+//
float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
return vreinterpret_f16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
return vreinterpret_f32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
return vreinterpret_f32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
return vreinterpret_f32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
return vreinterpret_f32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
return vreinterpret_f32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
return vreinterpret_f32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
return vreinterpret_f32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP0]]
+//
float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
return vreinterpret_f32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
return vreinterpret_f32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
return vreinterpret_f32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpret_f32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
-// CHECK: ret <2 x float> [[TMP0]]
+// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP1]]
+//
float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
return vreinterpret_f32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
return vreinterpret_p8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
return vreinterpret_p8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
return vreinterpret_p8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
return vreinterpret_p8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u8(
-// CHECK: ret <8 x i8> %a
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i8> [[A]]
+//
poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
return vreinterpret_p8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
return vreinterpret_p8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
return vreinterpret_p8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
return vreinterpret_p8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
return vreinterpret_p8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP1]]
+//
poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
return vreinterpret_p8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
return vreinterpret_p8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
return vreinterpret_p16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
return vreinterpret_p16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
return vreinterpret_p16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
return vreinterpret_p16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
return vreinterpret_p16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u16(
-// CHECK: ret <4 x i16> %a
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i16> [[A]]
+//
poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
return vreinterpret_p16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
return vreinterpret_p16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
return vreinterpret_p16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(
+// CHECK-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
return vreinterpret_p16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP1]]
+//
poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
return vreinterpret_p16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpret_p16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
-// CHECK: ret <4 x i16> [[TMP0]]
+// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[A]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP0]]
+//
poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
return vreinterpret_p16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
return vreinterpretq_s8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
return vreinterpretq_s8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
return vreinterpretq_s8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
return vreinterpretq_s8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
return vreinterpretq_s8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
return vreinterpretq_s8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
return vreinterpretq_s8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
return vreinterpretq_s8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
return vreinterpretq_s8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_p8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
return vreinterpretq_s8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
return vreinterpretq_s8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
return vreinterpretq_s16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
return vreinterpretq_s16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
return vreinterpretq_s16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
return vreinterpretq_s16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
return vreinterpretq_s16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
return vreinterpretq_s16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
return vreinterpretq_s16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
return vreinterpretq_s16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
return vreinterpretq_s16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
return vreinterpretq_s16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s16_p16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
return vreinterpretq_s16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
return vreinterpretq_s32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
return vreinterpretq_s32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
return vreinterpretq_s32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
return vreinterpretq_s32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
return vreinterpretq_s32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u32(
-// CHECK: ret <4 x i32> %a
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i32> [[A]]
+//
int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
return vreinterpretq_s32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
return vreinterpretq_s32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
return vreinterpretq_s32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
return vreinterpretq_s32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
return vreinterpretq_s32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
return vreinterpretq_s32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
return vreinterpretq_s64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
return vreinterpretq_s64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
return vreinterpretq_s64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
return vreinterpretq_s64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
return vreinterpretq_s64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
return vreinterpretq_s64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_u64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
return vreinterpretq_s64_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
return vreinterpretq_s64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
return vreinterpretq_s64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
return vreinterpretq_s64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_s64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
return vreinterpretq_s64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
return vreinterpretq_u8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
return vreinterpretq_u8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
return vreinterpretq_u8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
return vreinterpretq_u8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
return vreinterpretq_u8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
return vreinterpretq_u8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
return vreinterpretq_u8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
return vreinterpretq_u8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
return vreinterpretq_u8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_p8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
return vreinterpretq_u8_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
return vreinterpretq_u8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
return vreinterpretq_u16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
return vreinterpretq_u16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
return vreinterpretq_u16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
return vreinterpretq_u16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
return vreinterpretq_u16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
return vreinterpretq_u16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
return vreinterpretq_u16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
return vreinterpretq_u16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
return vreinterpretq_u16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
return vreinterpretq_u16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u16_p16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
return vreinterpretq_u16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
return vreinterpretq_u32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
return vreinterpretq_u32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s32(
-// CHECK: ret <4 x i32> %a
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <4 x i32> [[A]]
+//
uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
return vreinterpretq_u32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
return vreinterpretq_u32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
return vreinterpretq_u32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
return vreinterpretq_u32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
return vreinterpretq_u32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
return vreinterpretq_u32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
return vreinterpretq_u32_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
return vreinterpretq_u32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-// CHECK: ret <4 x i32> [[TMP0]]
+// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
return vreinterpretq_u32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
return vreinterpretq_u64_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
return vreinterpretq_u64_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
return vreinterpretq_u64_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_s64(
-// CHECK: ret <2 x i64> %a
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <2 x i64> [[A]]
+//
uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
return vreinterpretq_u64_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
return vreinterpretq_u64_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
return vreinterpretq_u64_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
return vreinterpretq_u64_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
return vreinterpretq_u64_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
return vreinterpretq_u64_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
return vreinterpretq_u64_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_u64_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
-// CHECK: ret <2 x i64> [[TMP0]]
+// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP0]]
+//
uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
return vreinterpretq_u64_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
return vreinterpretq_f16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
return vreinterpretq_f16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
return vreinterpretq_f16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
return vreinterpretq_f16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
return vreinterpretq_f16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
return vreinterpretq_f16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
return vreinterpretq_f16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
return vreinterpretq_f16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
return vreinterpretq_f16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP1]]
+//
float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
return vreinterpretq_f16_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f16_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
-// CHECK: ret <8 x half> [[TMP0]]
+// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP0]]
+//
float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
return vreinterpretq_f16_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
return vreinterpretq_f32_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
return vreinterpretq_f32_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
return vreinterpretq_f32_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
return vreinterpretq_f32_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
return vreinterpretq_f32_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
return vreinterpretq_f32_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
return vreinterpretq_f32_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
return vreinterpretq_f32_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
return vreinterpretq_f32_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
return vreinterpretq_f32_p8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_f32_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
-// CHECK: ret <4 x float> [[TMP0]]
+// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP1]]
+//
float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
return vreinterpretq_f32_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
return vreinterpretq_p8_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
return vreinterpretq_p8_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
return vreinterpretq_p8_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
return vreinterpretq_p8_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u8(
-// CHECK: ret <16 x i8> %a
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <16 x i8> [[A]]
+//
poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
return vreinterpretq_p8_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
return vreinterpretq_p8_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
return vreinterpretq_p8_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
return vreinterpretq_p8_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
return vreinterpretq_p8_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP1]]
+//
poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
return vreinterpretq_p8_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p8_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
return vreinterpretq_p8_p16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
return vreinterpretq_p16_s8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
return vreinterpretq_p16_s16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
return vreinterpretq_p16_s32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
return vreinterpretq_p16_s64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
return vreinterpretq_p16_u8(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u16(
-// CHECK: ret <8 x i16> %a
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret <8 x i16> [[A]]
+//
poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
return vreinterpretq_p16_u16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
return vreinterpretq_p16_u32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
return vreinterpretq_p16_u64(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_f16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
return vreinterpretq_p16_f16(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP1]]
+//
poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
return vreinterpretq_p16_f32(a);
}
-// CHECK-LABEL: @test_vreinterpretq_p16_p8(
-// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-// CHECK: ret <8 x i16> [[TMP0]]
+// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+//
poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
return vreinterpretq_p16_p8(a);
}
-// CHECK-LABEL: @test_vrev16_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev16_s8(int8x8_t a) {
return vrev16_s8(a);
}
-// CHECK-LABEL: @test_vrev16_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev16_u8(uint8x8_t a) {
return vrev16_u8(a);
}
-// CHECK-LABEL: @test_vrev16_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev16_p8(poly8x8_t a) {
return vrev16_p8(a);
}
-// CHECK-LABEL: @test_vrev16q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev16q_s8(int8x16_t a) {
return vrev16q_s8(a);
}
-// CHECK-LABEL: @test_vrev16q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev16q_u8(uint8x16_t a) {
return vrev16q_u8(a);
}
-// CHECK-LABEL: @test_vrev16q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev16q_p8(poly8x16_t a) {
return vrev16q_p8(a);
}
-// CHECK-LABEL: @test_vrev32_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev32_s8(int8x8_t a) {
return vrev32_s8(a);
}
-// CHECK-LABEL: @test_vrev32_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vrev32_s16(int16x4_t a) {
return vrev32_s16(a);
}
-// CHECK-LABEL: @test_vrev32_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev32_u8(uint8x8_t a) {
return vrev32_u8(a);
}
-// CHECK-LABEL: @test_vrev32_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vrev32_u16(uint16x4_t a) {
return vrev32_u16(a);
}
-// CHECK-LABEL: @test_vrev32_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev32_p8(poly8x8_t a) {
return vrev32_p8(a);
}
-// CHECK-LABEL: @test_vrev32_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vrev32_p16(poly16x4_t a) {
return vrev32_p16(a);
}
-// CHECK-LABEL: @test_vrev32q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev32q_s8(int8x16_t a) {
return vrev32q_s8(a);
}
-// CHECK-LABEL: @test_vrev32q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vrev32q_s16(int16x8_t a) {
return vrev32q_s16(a);
}
-// CHECK-LABEL: @test_vrev32q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev32q_u8(uint8x16_t a) {
return vrev32q_u8(a);
}
-// CHECK-LABEL: @test_vrev32q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vrev32q_u16(uint16x8_t a) {
return vrev32q_u16(a);
}
-// CHECK-LABEL: @test_vrev32q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev32q_p8(poly8x16_t a) {
return vrev32q_p8(a);
}
-// CHECK-LABEL: @test_vrev32q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vrev32q_p16(poly16x8_t a) {
return vrev32q_p16(a);
}
-// CHECK-LABEL: @test_vrev64_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
int8x8_t test_vrev64_s8(int8x8_t a) {
return vrev64_s8(a);
}
-// CHECK-LABEL: @test_vrev64_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
int16x4_t test_vrev64_s16(int16x4_t a) {
return vrev64_s16(a);
}
-// CHECK-LABEL: @test_vrev64_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
int32x2_t test_vrev64_s32(int32x2_t a) {
return vrev64_s32(a);
}
-// CHECK-LABEL: @test_vrev64_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
uint8x8_t test_vrev64_u8(uint8x8_t a) {
return vrev64_u8(a);
}
-// CHECK-LABEL: @test_vrev64_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
uint16x4_t test_vrev64_u16(uint16x4_t a) {
return vrev64_u16(a);
}
-// CHECK-LABEL: @test_vrev64_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i32> [[SHUFFLE_I]]
+//
uint32x2_t test_vrev64_u32(uint32x2_t a) {
return vrev64_u32(a);
}
-// CHECK-LABEL: @test_vrev64_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <8 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]]
+//
poly8x8_t test_vrev64_p8(poly8x8_t a) {
return vrev64_p8(a);
}
-// CHECK-LABEL: @test_vrev64_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-// CHECK: ret <4 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT: ret <4 x i16> [[SHUFFLE_I]]
+//
poly16x4_t test_vrev64_p16(poly16x4_t a) {
return vrev64_p16(a);
}
-// CHECK-LABEL: @test_vrev64_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
-// CHECK: ret <2 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define <2 x float> @test_vrev64_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[A]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x float> [[SHUFFLE_I]]
+//
float32x2_t test_vrev64_f32(float32x2_t a) {
return vrev64_f32(a);
}
-// CHECK-LABEL: @test_vrev64q_s8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
int8x16_t test_vrev64q_s8(int8x16_t a) {
return vrev64q_s8(a);
}
-// CHECK-LABEL: @test_vrev64q_s16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
int16x8_t test_vrev64q_s16(int16x8_t a) {
return vrev64q_s16(a);
}
-// CHECK-LABEL: @test_vrev64q_s32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
int32x4_t test_vrev64q_s32(int32x4_t a) {
return vrev64q_s32(a);
}
-// CHECK-LABEL: @test_vrev64q_u8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
uint8x16_t test_vrev64q_u8(uint8x16_t a) {
return vrev64q_u8(a);
}
-// CHECK-LABEL: @test_vrev64q_u16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
uint16x8_t test_vrev64q_u16(uint16x8_t a) {
return vrev64q_u16(a);
}
-// CHECK-LABEL: @test_vrev64q_u32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x i32> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x i32> [[SHUFFLE_I]]
+//
uint32x4_t test_vrev64q_u32(uint32x4_t a) {
return vrev64q_u32(a);
}
-// CHECK-LABEL: @test_vrev64q_p8(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-// CHECK: ret <16 x i8> [[SHUFFLE_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]]
+//
poly8x16_t test_vrev64q_p8(poly8x16_t a) {
return vrev64q_p8(a);
}
-// CHECK-LABEL: @test_vrev64q_p16(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-// CHECK: ret <8 x i16> [[SHUFFLE_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT: ret <8 x i16> [[SHUFFLE_I]]
+//
poly16x8_t test_vrev64q_p16(poly16x8_t a) {
return vrev64q_p16(a);
}
-// CHECK-LABEL: @test_vrev64q_f32(
-// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK: ret <4 x float> [[SHUFFLE_I]]
+// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[A]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]]
+//
float32x4_t test_vrev64q_f32(float32x4_t a) {
return vrev64q_f32(a);
}
-// CHECK-LABEL: @test_vrhadd_s8(
-// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRHADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]]
+//
int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
return vrhadd_s8(a, b);
}
-// CHECK-LABEL: @test_vrhadd_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRHADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
return vrhadd_s16(a, b);
}
-// CHECK-LABEL: @test_vrhadd_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRHADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
return vrhadd_s32(a, b);
}
-// CHECK-LABEL: @test_vrhadd_u8(
-// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRHADD_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]]
+//
uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
return vrhadd_u8(a, b);
}
-// CHECK-LABEL: @test_vrhadd_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRHADD_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
return vrhadd_u16(a, b);
}
-// CHECK-LABEL: @test_vrhadd_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRHADD_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]])
+// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
return vrhadd_u32(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_s8(
-// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]]
+//
int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
return vrhaddq_s8(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
return vrhaddq_s16(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
return vrhaddq_s32(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_u8(
-// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]]
+//
uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
return vrhaddq_u8(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
return vrhaddq_u16(a, b);
}
-// CHECK-LABEL: @test_vrhaddq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]])
+// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
return vrhaddq_u32(a, b);
}
-// CHECK-LABEL: @test_vrshl_s8(
-// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRSHL_V_I]]
+//
int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
return vrshl_s8(a, b);
}
-// CHECK-LABEL: @test_vrshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
return vrshl_s16(a, b);
}
-// CHECK-LABEL: @test_vrshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
return vrshl_s32(a, b);
}
-// CHECK-LABEL: @test_vrshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
return vrshl_s64(a, b);
}
-// CHECK-LABEL: @test_vrshl_u8(
-// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VRSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VRSHL_V_I]]
+//
uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
return vrshl_u8(a, b);
}
-// CHECK-LABEL: @test_vrshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
return vrshl_u16(a, b);
}
-// CHECK-LABEL: @test_vrshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
return vrshl_u32(a, b);
}
-// CHECK-LABEL: @test_vrshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VRSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]])
+// CHECK-NEXT: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
return vrshl_u64(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s8(
-// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRSHLQ_V_I]]
+//
int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
return vrshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
return vrshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
return vrshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vrshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
return vrshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u8(
-// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VRSHLQ_V_I]]
+//
uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
return vrshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
return vrshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
return vrshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vrshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]])
+// CHECK-NEXT: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
return vrshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vrshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VRSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VRSHRN_N1]]
+//
int8x8_t test_vrshrn_n_s16(int16x8_t a) {
return vrshrn_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vrshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VRSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]]
+//
int16x4_t test_vrshrn_n_s32(int32x4_t a) {
return vrshrn_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vrshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VRSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VRSHRN_N1]]
+//
int32x2_t test_vrshrn_n_s64(int64x2_t a) {
return vrshrn_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vrshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i8> [[VRSHRN_N1]]
+// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i8> [[VRSHRN_N1]]
+//
uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
return vrshrn_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vrshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i16> [[VRSHRN_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]]
+//
uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
return vrshrn_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vrshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i32> [[VRSHRN_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i32> [[VRSHRN_N1]]
+//
uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
return vrshrn_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1))
-// CHECK: ret <8 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <8 x i8> [[VRSHR_N]]
+//
int8x8_t test_vrshr_n_s8(int8x8_t a) {
return vrshr_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1))
-// CHECK: ret <4 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <4 x i16> [[VRSHR_N1]]
+//
int16x4_t test_vrshr_n_s16(int16x4_t a) {
return vrshr_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1))
-// CHECK: ret <2 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <2 x i32> [[VRSHR_N1]]
+//
int32x2_t test_vrshr_n_s32(int32x2_t a) {
return vrshr_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VRSHR_N1]]
+//
int64x1_t test_vrshr_n_s64(int64x1_t a) {
return vrshr_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1))
-// CHECK: ret <8 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> [[A]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <8 x i8> [[VRSHR_N]]
+//
uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
return vrshr_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1))
-// CHECK: ret <4 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <4 x i16> [[VRSHR_N1]]
+//
uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
return vrshr_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1))
-// CHECK: ret <2 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <2 x i32> [[VRSHR_N1]]
+//
uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
return vrshr_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vrshr_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VRSHR_N1]]
+//
uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
return vrshr_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_s8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1))
-// CHECK: ret <16 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <16 x i8> [[VRSHR_N]]
+//
int8x16_t test_vrshrq_n_s8(int8x16_t a) {
return vrshrq_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i16> [[VRSHR_N1]]
+//
int16x8_t test_vrshrq_n_s16(int16x8_t a) {
return vrshrq_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i32> [[VRSHR_N1]]
+//
int32x4_t test_vrshrq_n_s32(int32x4_t a) {
return vrshrq_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i64> [[VRSHR_N1]]
+//
int64x2_t test_vrshrq_n_s64(int64x2_t a) {
return vrshrq_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_u8(
-// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1))
-// CHECK: ret <16 x i8> [[VRSHR_N]]
+// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> [[A]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <16 x i8> [[VRSHR_N]]
+//
uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
return vrshrq_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i16> [[VRSHR_N1]]
+// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i16> [[VRSHR_N1]]
+//
uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
return vrshrq_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i32> [[VRSHR_N1]]
+// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i32> [[VRSHR_N1]]
+//
uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
return vrshrq_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vrshrq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i64> [[VRSHR_N1]]
+// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i64> [[VRSHR_N1]]
+//
uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
return vrshrq_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vrsqrte_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
-// CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
+// CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]])
+// CHECK-NEXT: ret <2 x float> [[VRSQRTE_V1_I]]
+//
float32x2_t test_vrsqrte_f32(float32x2_t a) {
return vrsqrte_f32(a);
}
-// CHECK-LABEL: @test_vrsqrte_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
-// CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]])
+// CHECK-NEXT: ret <2 x i32> [[VRSQRTE_V1_I]]
+//
uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
return vrsqrte_u32(a);
}
-// CHECK-LABEL: @test_vrsqrteq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
-// CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
+// CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]])
+// CHECK-NEXT: ret <4 x float> [[VRSQRTEQ_V1_I]]
+//
float32x4_t test_vrsqrteq_f32(float32x4_t a) {
return vrsqrteq_f32(a);
}
-// CHECK-LABEL: @test_vrsqrteq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
-// CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
+// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]])
+// CHECK-NEXT: ret <4 x i32> [[VRSQRTEQ_V1_I]]
+//
uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
return vrsqrteq_u32(a);
}
-// CHECK-LABEL: @test_vrsqrts_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
-// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
-// CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
+// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]])
+// CHECK-NEXT: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
+// CHECK-NEXT: ret <2 x float> [[TMP5]]
+//
float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
return vrsqrts_f32(a, b);
}
-// CHECK-LABEL: @test_vrsqrtsq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
-// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
+// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]])
+// CHECK-NEXT: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
return vrsqrtsq_f32(a, b);
}
-// CHECK-LABEL: @test_vrsra_n_s8(
-// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
-// CHECK: ret <8 x i8> [[VRSRA_N]]
+// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> [[B]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <8 x i8> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i8> [[VRSRA_N]]
+//
int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
return vrsra_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
-// CHECK: ret <4 x i16> [[VRSRA_N]]
+// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <4 x i16> [[VRSRA_N]]
+//
int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
return vrsra_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
-// CHECK: ret <2 x i32> [[VRSRA_N]]
+// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <2 x i32> [[VRSRA_N]]
+//
int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
return vrsra_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
-// CHECK: ret <1 x i64> [[VRSRA_N]]
+// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <1 x i64> [[VRSRA_N]]
+//
int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
return vrsra_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_u8(
-// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
-// CHECK: ret <8 x i8> [[VRSRA_N]]
+// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> [[B]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <8 x i8> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <8 x i8> [[VRSRA_N]]
+//
uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
return vrsra_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
-// CHECK: ret <4 x i16> [[VRSRA_N]]
+// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <4 x i16> [[VRSRA_N]]
+//
uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
return vrsra_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
-// CHECK: ret <2 x i32> [[VRSRA_N]]
+// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <2 x i32> [[VRSRA_N]]
+//
uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
return vrsra_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vrsra_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
-// CHECK: ret <1 x i64> [[VRSRA_N]]
+// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <1 x i64> [[VRSRA_N]]
+//
uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
return vrsra_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_s8(
-// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
-// CHECK: ret <16 x i8> [[VRSRA_N]]
+// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> [[B]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <16 x i8> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <16 x i8> [[VRSRA_N]]
+//
int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
return vrsraq_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
-// CHECK: ret <8 x i16> [[VRSRA_N]]
+// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <8 x i16> [[VRSRA_N]]
+//
int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
return vrsraq_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
-// CHECK: ret <4 x i32> [[VRSRA_N]]
+// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <4 x i32> [[VRSRA_N]]
+//
int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
return vrsraq_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-// CHECK: ret <2 x i64> [[VRSRA_N]]
+// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <2 x i64> [[VRSRA_N]]
+//
int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
return vrsraq_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_u8(
-// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
-// CHECK: ret <16 x i8> [[VRSRA_N]]
+// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> [[B]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <16 x i8> [[A]], [[TMP0]]
+// CHECK-NEXT: ret <16 x i8> [[VRSRA_N]]
+//
uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
return vrsraq_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
-// CHECK: ret <8 x i16> [[VRSRA_N]]
+// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <8 x i16> [[VRSRA_N]]
+//
uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
return vrsraq_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
-// CHECK: ret <4 x i32> [[VRSRA_N]]
+// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <4 x i32> [[VRSRA_N]]
+//
uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
return vrsraq_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vrsraq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1))
-// CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-// CHECK: ret <2 x i64> [[VRSRA_N]]
+// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
+// CHECK-NEXT: ret <2 x i64> [[VRSRA_N]]
+//
uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
return vrsraq_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vrsubhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRSUBHN_V2_I]]
+//
int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
return vrsubhn_s16(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
return vrsubhn_s32(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
return vrsubhn_s64(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: ret <8 x i8> [[VRSUBHN_V2_I]]
+//
uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
return vrsubhn_u16(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
return vrsubhn_u32(a, b);
}
-// CHECK-LABEL: @test_vrsubhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]])
+// CHECK-NEXT: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
return vrsubhn_u64(a, b);
}
-// CHECK-LABEL: @test_vset_lane_u8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VSET_LANE]]
+//
uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
return vset_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vset_lane_u16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[B]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VSET_LANE]]
+//
uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
return vset_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vset_lane_u32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VSET_LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[B]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VSET_LANE]]
+//
uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
return vset_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vset_lane_s8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VSET_LANE]]
+//
int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
return vset_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vset_lane_s16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[B]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VSET_LANE]]
+//
int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
return vset_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vset_lane_s32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
-// CHECK: ret <2 x i32> [[VSET_LANE]]
+// CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[B]], i32 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i32> [[VSET_LANE]]
+//
int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
return vset_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vset_lane_p8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
-// CHECK: ret <8 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VSET_LANE]]
+//
poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
return vset_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vset_lane_p16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
-// CHECK: ret <4 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[B]], i16 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i16> [[VSET_LANE]]
+//
poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
return vset_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vset_lane_f32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
-// CHECK: ret <2 x float> [[VSET_LANE]]
+// CHECK-LABEL: define <2 x float> @test_vset_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x float> [[B]], float [[A]], i32 1
+// CHECK-NEXT: ret <2 x float> [[VSET_LANE]]
+//
float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
return vset_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vset_lane_f16(
-// CHECK: [[__REINT_246:%.*]] = alloca half, align 2
-// CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
-// CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: store half [[TMP0]], ptr [[__REINT_246]], align 2
-// CHECK: store <4 x half> %b, ptr [[__REINT1_246]], align 8
-// CHECK: [[TMP2:%.*]] = load i16, ptr [[__REINT_246]], align 2
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT1_246]], align 8
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
-// CHECK: store <4 x i16> [[VSET_LANE]], ptr [[__REINT2_246]], align 8
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[__REINT2_246]], align 8
-// CHECK: ret <4 x half> [[TMP8]]
+// CHECK-LABEL: define <4 x half> @test_vset_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[TMP2]], i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[VSET_LANE]] to <4 x half>
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
return vset_lane_f16(*a, b, 1);
}
-// CHECK-LABEL: @test_vsetq_lane_u8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(
+// CHECK-SAME: i8 noundef zeroext [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VSET_LANE]]
+//
uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
return vsetq_lane_u8(a, b, 15);
}
-// CHECK-LABEL: @test_vsetq_lane_u16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(
+// CHECK-SAME: i16 noundef zeroext [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[B]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VSET_LANE]]
+//
uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
return vsetq_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vsetq_lane_u32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[B]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VSET_LANE]]
+//
uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
return vsetq_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vsetq_lane_s8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VSET_LANE]]
+//
int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
return vsetq_lane_s8(a, b, 15);
}
-// CHECK-LABEL: @test_vsetq_lane_s16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[B]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VSET_LANE]]
+//
int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
return vsetq_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vsetq_lane_s32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
-// CHECK: ret <4 x i32> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(
+// CHECK-SAME: i32 noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[B]], i32 [[A]], i32 3
+// CHECK-NEXT: ret <4 x i32> [[VSET_LANE]]
+//
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
return vsetq_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vsetq_lane_p8(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
-// CHECK: ret <16 x i8> [[VSET_LANE]]
+// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(
+// CHECK-SAME: i8 noundef signext [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[A]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VSET_LANE]]
+//
poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
return vsetq_lane_p8(a, b, 15);
}
-// CHECK-LABEL: @test_vsetq_lane_p16(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
-// CHECK: ret <8 x i16> [[VSET_LANE]]
+// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(
+// CHECK-SAME: i16 noundef signext [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[B]], i16 [[A]], i32 7
+// CHECK-NEXT: ret <8 x i16> [[VSET_LANE]]
+//
poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
return vsetq_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vsetq_lane_f32(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
-// CHECK: ret <4 x float> [[VSET_LANE]]
+// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(
+// CHECK-SAME: float noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x float> [[B]], float [[A]], i32 3
+// CHECK-NEXT: ret <4 x float> [[VSET_LANE]]
+//
float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
return vsetq_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vsetq_lane_f16(
-// CHECK: [[__REINT_248:%.*]] = alloca half, align 2
-// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
-// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
-// CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
-// CHECK: store half [[TMP0]], ptr [[__REINT_248]], align 2
-// CHECK: store <8 x half> %b, ptr [[__REINT1_248]], align 16
-// CHECK: [[TMP2:%.*]] = load i16, ptr [[__REINT_248]], align 2
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT1_248]], align 16
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
-// CHECK: store <8 x i16> [[VSET_LANE]], ptr [[__REINT2_248]], align 16
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[__REINT2_248]], align 16
-// CHECK: ret <8 x half> [[TMP8]]
+// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP2]], i32 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[VSET_LANE]] to <8 x half>
+// CHECK-NEXT: ret <8 x half> [[TMP3]]
+//
float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
return vsetq_lane_f16(*a, b, 3);
}
-// CHECK-LABEL: @test_vset_lane_s64(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
-// CHECK: ret <1 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[B]], i64 [[A]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VSET_LANE]]
+//
int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
return vset_lane_s64(a, b, 0);
}
-// CHECK-LABEL: @test_vset_lane_u64(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
-// CHECK: ret <1 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[B]], i64 [[A]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[VSET_LANE]]
+//
uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
return vset_lane_u64(a, b, 0);
}
-// CHECK-LABEL: @test_vsetq_lane_s64(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[B]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VSET_LANE]]
+//
int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
return vsetq_lane_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsetq_lane_u64(
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
-// CHECK: ret <2 x i64> [[VSET_LANE]]
+// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[B]], i64 [[A]], i32 1
+// CHECK-NEXT: ret <2 x i64> [[VSET_LANE]]
+//
uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
return vsetq_lane_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vshl_s8(
-// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vshl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VSHL_V_I]]
+//
int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
return vshl_s8(a, b);
}
-// CHECK-LABEL: @test_vshl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vshl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
return vshl_s16(a, b);
}
-// CHECK-LABEL: @test_vshl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vshl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
return vshl_s32(a, b);
}
-// CHECK-LABEL: @test_vshl_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vshl_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
return vshl_s64(a, b);
}
-// CHECK-LABEL: @test_vshl_u8(
-// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VSHL_V_I]]
+// CHECK-LABEL: define <8 x i8> @test_vshl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VSHL_V_I]]
+//
uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
return vshl_u8(a, b);
}
-// CHECK-LABEL: @test_vshl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <4 x i16> [[VSHL_V2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vshl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[TMP2]]
+//
uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
return vshl_u16(a, b);
}
-// CHECK-LABEL: @test_vshl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <2 x i32> [[VSHL_V2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vshl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[TMP2]]
+//
uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
return vshl_u32(a, b);
}
-// CHECK-LABEL: @test_vshl_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
-// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
-// CHECK: ret <1 x i64> [[VSHL_V2_I]]
+// CHECK-LABEL: define <1 x i64> @test_vshl_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]])
+// CHECK-NEXT: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to i64
+// CHECK-NEXT: [[REF_TMP_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
+// CHECK-NEXT: ret <1 x i64> [[REF_TMP_I_SROA_0_0_VEC_INSERT]]
+//
uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
return vshl_u64(a, b);
}
-// CHECK-LABEL: @test_vshlq_s8(
-// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VSHLQ_V_I]]
+//
int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
return vshlq_s8(a, b);
}
-// CHECK-LABEL: @test_vshlq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
return vshlq_s16(a, b);
}
-// CHECK-LABEL: @test_vshlq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
return vshlq_s32(a, b);
}
-// CHECK-LABEL: @test_vshlq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
return vshlq_s64(a, b);
}
-// CHECK-LABEL: @test_vshlq_u8(
-// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
-// CHECK: ret <16 x i8> [[VSHLQ_V_I]]
+// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// CHECK-NEXT: ret <16 x i8> [[VSHLQ_V_I]]
+//
uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
return vshlq_u8(a, b);
}
-// CHECK-LABEL: @test_vshlq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
return vshlq_u16(a, b);
}
-// CHECK-LABEL: @test_vshlq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
return vshlq_u32(a, b);
}
-// CHECK-LABEL: @test_vshlq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
-// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
-// CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
+// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]])
+// CHECK-NEXT: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
+// CHECK-NEXT: ret <2 x i64> [[TMP2]]
+//
uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
return vshlq_u64(a, b);
}
-// CHECK-LABEL: @test_vshll_n_s8(
-// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
int16x8_t test_vshll_n_s8(int8x8_t a) {
return vshll_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vshll_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
int32x4_t test_vshll_n_s16(int16x4_t a) {
return vshll_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshll_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
int64x2_t test_vshll_n_s32(int32x2_t a) {
return vshll_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshll_n_u8(
-// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHLL_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]]
+//
uint16x8_t test_vshll_n_u8(uint8x8_t a) {
return vshll_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vshll_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHLL_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]]
+//
uint32x4_t test_vshll_n_u16(uint16x4_t a) {
return vshll_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshll_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHLL_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]]
+//
uint64x2_t test_vshll_n_u32(uint32x2_t a) {
return vshll_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_s8(
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1)
-// CHECK: ret <8 x i8> [[VSHL_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
+//
int8x8_t test_vshl_n_s8(int8x8_t a) {
return vshl_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <4 x i16> [[VSHL_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
+//
int16x4_t test_vshl_n_s16(int16x4_t a) {
return vshl_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <2 x i32> [[VSHL_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
+//
int32x2_t test_vshl_n_s32(int32x2_t a) {
return vshl_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHL_N]]
+// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
+//
int64x1_t test_vshl_n_s64(int64x1_t a) {
return vshl_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_u8(
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1)
-// CHECK: ret <8 x i8> [[VSHL_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <8 x i8> [[VSHL_N]]
+//
uint8x8_t test_vshl_n_u8(uint8x8_t a) {
return vshl_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <4 x i16> [[VSHL_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <4 x i16> [[VSHL_N]]
+//
uint16x4_t test_vshl_n_u16(uint16x4_t a) {
return vshl_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <2 x i32> [[VSHL_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VSHL_N]]
+//
uint32x2_t test_vshl_n_u32(uint32x2_t a) {
return vshl_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshl_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHL_N]]
+// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHL_N]]
+//
uint64x1_t test_vshl_n_u64(uint64x1_t a) {
return vshl_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_s8(
-// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1)
-// CHECK: ret <16 x i8> [[VSHL_N]]
+// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
+//
int8x16_t test_vshlq_n_s8(int8x16_t a) {
return vshlq_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHL_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
+//
int16x8_t test_vshlq_n_s16(int16x8_t a) {
return vshlq_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHL_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
+//
int32x4_t test_vshlq_n_s32(int32x4_t a) {
return vshlq_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHL_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
+//
int64x2_t test_vshlq_n_s64(int64x2_t a) {
return vshlq_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_u8(
-// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1)
-// CHECK: ret <16 x i8> [[VSHL_N]]
+// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <16 x i8> [[VSHL_N]]
+//
uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
return vshlq_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHL_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHL_N]]
+//
uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
return vshlq_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHL_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHL_N]]
+//
uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
return vshlq_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshlq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHL_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHL_N]]
+//
uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
return vshlq_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSHRN_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSHRN_N]]
+//
int8x8_t test_vshrn_n_s16(int16x8_t a) {
return vshrn_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSHRN_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSHRN_N]]
+//
int16x4_t test_vshrn_n_s32(int32x4_t a) {
return vshrn_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSHRN_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSHRN_N]]
+//
int32x2_t test_vshrn_n_s64(int64x2_t a) {
return vshrn_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSHRN_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSHRN_N]]
+//
uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
return vshrn_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSHRN_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSHRN_N]]
+//
uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
return vshrn_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshrn_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSHRN_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSHRN_N]]
+//
uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
return vshrn_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_s8(
-// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 1)
-// CHECK: ret <8 x i8> [[VSHR_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <8 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <8 x i8> [[VSHR_N]]
+//
int8x8_t test_vshr_n_s8(int8x8_t a) {
return vshr_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <4 x i16> [[VSHR_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <4 x i16> [[VSHR_N]]
+//
int16x4_t test_vshr_n_s16(int16x4_t a) {
return vshr_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <2 x i32> [[VSHR_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VSHR_N]]
+//
int32x2_t test_vshr_n_s32(int32x2_t a) {
return vshr_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHR_N]]
+// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHR_N]]
+//
int64x1_t test_vshr_n_s64(int64x1_t a) {
return vshr_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_u8(
-// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 1)
-// CHECK: ret <8 x i8> [[VSHR_N]]
+// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <8 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <8 x i8> [[VSHR_N]]
+//
uint8x8_t test_vshr_n_u8(uint8x8_t a) {
return vshr_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <4 x i16> [[VSHR_N]]
+// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <4 x i16> [[VSHR_N]]
+//
uint16x4_t test_vshr_n_u16(uint16x4_t a) {
return vshr_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <2 x i32> [[VSHR_N]]
+// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <2 x i32> [[VSHR_N]]
+//
uint32x2_t test_vshr_n_u32(uint32x2_t a) {
return vshr_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshr_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <1 x i64> [[VSHR_N]]
+// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <1 x i64> [[VSHR_N]]
+//
uint64x1_t test_vshr_n_u64(uint64x1_t a) {
return vshr_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_s8(
-// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 1)
-// CHECK: ret <16 x i8> [[VSHR_N]]
+// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <16 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <16 x i8> [[VSHR_N]]
+//
int8x16_t test_vshrq_n_s8(int8x16_t a) {
return vshrq_n_s8(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHR_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHR_N]]
+//
int16x8_t test_vshrq_n_s16(int16x8_t a) {
return vshrq_n_s16(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHR_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHR_N]]
+//
int32x4_t test_vshrq_n_s32(int32x4_t a) {
return vshrq_n_s32(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHR_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHR_N]]
+//
int64x2_t test_vshrq_n_s64(int64x2_t a) {
return vshrq_n_s64(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_u8(
-// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 1)
-// CHECK: ret <16 x i8> [[VSHR_N]]
+// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <16 x i8> [[A]], splat (i8 1)
+// CHECK-NEXT: ret <16 x i8> [[VSHR_N]]
+//
uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
return vshrq_n_u8(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
-// CHECK: ret <8 x i16> [[VSHR_N]]
+// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
+// CHECK-NEXT: ret <8 x i16> [[VSHR_N]]
+//
uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
return vshrq_n_u16(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
-// CHECK: ret <4 x i32> [[VSHR_N]]
+// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
+// CHECK-NEXT: ret <4 x i32> [[VSHR_N]]
+//
uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
return vshrq_n_u32(a, 1);
}
-// CHECK-LABEL: @test_vshrq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
-// CHECK: ret <2 x i64> [[VSHR_N]]
+// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
+// CHECK-NEXT: ret <2 x i64> [[VSHR_N]]
+//
uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
return vshrq_n_u64(a, 1);
}
-// CHECK-LABEL: @test_vsli_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
return vsli_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
return vsli_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1))
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1))
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
return vsli_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
return vsli_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
return vsli_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
return vsli_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1))
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1))
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
return vsli_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1))
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1))
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
return vsli_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
return vsli_n_p8(a, b, 1);
}
-// CHECK-LABEL: @test_vsli_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
return vsli_n_p16(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
return vsliq_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
return vsliq_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1))
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1))
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
return vsliq_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1))
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1))
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
return vsliq_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsliq_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsliq_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1))
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1))
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsliq_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1))
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1))
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsliq_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
return vsliq_n_p8(a, b, 1);
}
-// CHECK-LABEL: @test_vsliq_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
return vsliq_n_p16(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_s8(
-// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 1)
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <8 x i8> [[B]], splat (i8 1)
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
return vsra_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 1)
-// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i16> [[TMP4]]
+// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
return vsra_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 1)
-// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i32> [[TMP4]]
+// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
return vsra_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <1 x i64> [[TMP4]]
+// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <1 x i64> [[TMP4]]
+//
int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
return vsra_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_u8(
-// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 1)
-// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
-// CHECK: ret <8 x i8> [[TMP0]]
+// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <8 x i8> [[B]], splat (i8 1)
+// CHECK-NEXT: [[TMP0:%.*]] = add <8 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
return vsra_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 1)
-// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i16> [[TMP4]]
+// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i16> [[TMP4]]
+//
uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
return vsra_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 1)
-// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i32> [[TMP4]]
+// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i32> [[TMP4]]
+//
uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
return vsra_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsra_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <1 x i64> [[TMP4]]
+// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <1 x i64> [[TMP4]]
+//
uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
return vsra_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_s8(
-// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 1)
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <16 x i8> [[B]], splat (i8 1)
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
return vsraq_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 1)
-// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <8 x i16> [[TMP4]]
+// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
return vsraq_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 1)
-// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i32> [[TMP4]]
+// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
return vsraq_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i64> [[TMP4]]
+// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
+//
int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
return vsraq_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_u8(
-// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 1)
-// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
-// CHECK: ret <16 x i8> [[TMP0]]
+// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <16 x i8> [[B]], splat (i8 1)
+// CHECK-NEXT: [[TMP0:%.*]] = add <16 x i8> [[A]], [[VSRA_N]]
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsraq_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 1)
-// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <8 x i16> [[TMP4]]
+// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <8 x i16> [[TMP4]]
+//
uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsraq_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 1)
-// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <4 x i32> [[TMP4]]
+// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <4 x i32> [[TMP4]]
+//
uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsraq_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsraq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 1)
-// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
-// CHECK: ret <2 x i64> [[TMP4]]
+// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 1)
+// CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// CHECK-NEXT: ret <2 x i64> [[TMP4]]
+//
uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsraq_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
return vsri_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
return vsri_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1))
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
return vsri_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
return vsri_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
return vsri_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
return vsri_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1))
-// CHECK: ret <2 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <2 x i32> [[VSLI_N2]]
+//
uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
return vsri_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1))
-// CHECK: ret <1 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <1 x i64> [[VSLI_N2]]
+//
uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
return vsri_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1))
-// CHECK: ret <8 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <8 x i8> [[VSLI_N]]
+//
poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
return vsri_n_p8(a, b, 1);
}
-// CHECK-LABEL: @test_vsri_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
-// CHECK: ret <4 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <4 x i16> [[VSLI_N2]]
+//
poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
return vsri_n_p16(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_s8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
return vsriq_n_s8(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
return vsriq_n_s16(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
return vsriq_n_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
return vsriq_n_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_u8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
return vsriq_n_u8(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
return vsriq_n_u16(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1))
-// CHECK: ret <4 x i32> [[VSLI_N2]]
+// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1))
+// CHECK-NEXT: ret <4 x i32> [[VSLI_N2]]
+//
uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
return vsriq_n_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1))
-// CHECK: ret <2 x i64> [[VSLI_N2]]
+// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1))
+// CHECK-NEXT: ret <2 x i64> [[VSLI_N2]]
+//
uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
return vsriq_n_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_p8(
-// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1))
-// CHECK: ret <16 x i8> [[VSLI_N]]
+// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> splat (i8 -1))
+// CHECK-NEXT: ret <16 x i8> [[VSLI_N]]
+//
poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
return vsriq_n_p8(a, b, 1);
}
-// CHECK-LABEL: @test_vsriq_n_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
-// CHECK: ret <8 x i16> [[VSLI_N2]]
+// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1))
+// CHECK-NEXT: ret <8 x i16> [[VSLI_N2]]
+//
poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
return vsriq_n_p16(a, b, 1);
}
-// CHECK-LABEL: @test_vst1q_u8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v16i8(ptr [[A]], <16 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
vst1q_u8(a, b);
}
-// CHECK-LABEL: @test_vst1q_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i16(ptr [[A]], <8 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
vst1q_u16(a, b);
}
-// CHECK-LABEL: @test_vst1q_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4i32(ptr %a, <4 x i32> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4i32(ptr [[A]], <4 x i32> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
vst1q_u32(a, b);
}
-// CHECK-LABEL: @test_vst1q_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v2i64(ptr %a, <2 x i64> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v2i64(ptr [[A]], <2 x i64> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
vst1q_u64(a, b);
}
-// CHECK-LABEL: @test_vst1q_s8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v16i8(ptr [[A]], <16 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1q_s8(int8_t * a, int8x16_t b) {
vst1q_s8(a, b);
}
-// CHECK-LABEL: @test_vst1q_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i16(ptr [[A]], <8 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1q_s16(int16_t * a, int16x8_t b) {
vst1q_s16(a, b);
}
-// CHECK-LABEL: @test_vst1q_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4i32(ptr %a, <4 x i32> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4i32(ptr [[A]], <4 x i32> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_s32(int32_t * a, int32x4_t b) {
vst1q_s32(a, b);
}
-// CHECK-LABEL: @test_vst1q_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v2i64(ptr %a, <2 x i64> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v2i64(ptr [[A]], <2 x i64> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_s64(int64_t * a, int64x2_t b) {
vst1q_s64(a, b);
}
-// CHECK-LABEL: @test_vst1q_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8f16(ptr %a, <8 x half> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8f16(ptr [[A]], <8 x half> [[TMP2]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1q_f16(float16_t * a, float16x8_t b) {
vst1q_f16(a, b);
}
-// CHECK-LABEL: @test_vst1q_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4f32(ptr %a, <4 x float> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4f32(ptr [[A]], <4 x float> [[TMP2]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_f32(float32_t * a, float32x4_t b) {
vst1q_f32(a, b);
}
-// CHECK-LABEL: @test_vst1q_p8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v16i8(ptr [[A]], <16 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
vst1q_p8(a, b);
}
-// CHECK-LABEL: @test_vst1q_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i16(ptr [[A]], <8 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
vst1q_p16(a, b);
}
-// CHECK-LABEL: @test_vst1_u8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i8(ptr [[A]], <8 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1_u8(uint8_t * a, uint8x8_t b) {
vst1_u8(a, b);
}
-// CHECK-LABEL: @test_vst1_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4i16(ptr [[A]], <4 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1_u16(uint16_t * a, uint16x4_t b) {
vst1_u16(a, b);
}
-// CHECK-LABEL: @test_vst1_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v2i32(ptr %a, <2 x i32> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v2i32(ptr [[A]], <2 x i32> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1_u32(uint32_t * a, uint32x2_t b) {
vst1_u32(a, b);
}
-// CHECK-LABEL: @test_vst1_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v1i64(ptr [[A]], <1 x i64> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1_u64(uint64_t * a, uint64x1_t b) {
vst1_u64(a, b);
}
-// CHECK-LABEL: @test_vst1_s8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i8(ptr [[A]], <8 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1_s8(int8_t * a, int8x8_t b) {
vst1_s8(a, b);
}
-// CHECK-LABEL: @test_vst1_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4i16(ptr [[A]], <4 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1_s16(int16_t * a, int16x4_t b) {
vst1_s16(a, b);
}
-// CHECK-LABEL: @test_vst1_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v2i32(ptr %a, <2 x i32> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v2i32(ptr [[A]], <2 x i32> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1_s32(int32_t * a, int32x2_t b) {
vst1_s32(a, b);
}
-// CHECK-LABEL: @test_vst1_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v1i64(ptr [[A]], <1 x i64> [[TMP1]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1_s64(int64_t * a, int64x1_t b) {
vst1_s64(a, b);
}
-// CHECK-LABEL: @test_vst1_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4f16(ptr %a, <4 x half> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4f16(ptr [[A]], <4 x half> [[TMP2]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1_f16(float16_t * a, float16x4_t b) {
vst1_f16(a, b);
}
-// CHECK-LABEL: @test_vst1_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v2f32(ptr %a, <2 x float> [[TMP2]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v2f32(ptr [[A]], <2 x float> [[TMP2]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1_f32(float32_t * a, float32x2_t b) {
vst1_f32(a, b);
}
-// CHECK-LABEL: @test_vst1_p8(
-// CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v8i8(ptr [[A]], <8 x i8> [[B]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst1_p8(poly8_t * a, poly8x8_t b) {
vst1_p8(a, b);
}
-// CHECK-LABEL: @test_vst1_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v4i16(ptr [[A]], <4 x i16> [[TMP1]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst1_p16(poly16_t * a, poly16x4_t b) {
vst1_p16(a, b);
}
-// CHECK-LABEL: @test_vst1q_lane_u8(
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
vst1q_lane_u8(a, b, 15);
}
-// CHECK-LABEL: @test_vst1q_lane_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
vst1q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vst1q_lane_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK: store i32 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
vst1q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vst1q_lane_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP3]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> <i32 1>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v1i64(ptr [[A]], <1 x i64> [[TMP2]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
vst1q_lane_u64(a, b, 1);
}
-// CHECK-LABEL: @test_vst1q_lane_s8(
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
vst1q_lane_s8(a, b, 15);
}
-// CHECK-LABEL: @test_vst1q_lane_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
vst1q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vst1q_lane_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-// CHECK: store i32 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
vst1q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vst1q_lane_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
-// CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP3]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> <i32 1>
+// CHECK-NEXT: call void @llvm.arm.neon.vst1.p0.v1i64(ptr [[A]], <1 x i64> [[TMP2]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
vst1q_lane_s64(a, b, 1);
}
-// CHECK-LABEL: @test_vst1q_lane_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
-// CHECK: store half [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[B]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
+// CHECK-NEXT: store half [[TMP3]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
vst1q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vst1q_lane_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
-// CHECK: store float [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
+// CHECK-NEXT: store float [[TMP3]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
vst1q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vst1q_lane_p8(
-// CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x i8> [[B]], i32 15
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
vst1q_lane_p8(a, b, 15);
}
-// CHECK-LABEL: @test_vst1q_lane_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
vst1q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vst1_lane_u8(
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
vst1_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vst1_lane_u16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
vst1_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vst1_lane_u32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK: store i32 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
vst1_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vst1_lane_u64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
-// CHECK: store i64 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
vst1_lane_u64(a, b, 0);
}
-// CHECK-LABEL: @test_vst1_lane_s8(
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
vst1_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vst1_lane_s16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
vst1_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vst1_lane_s32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-// CHECK: store i32 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
vst1_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vst1_lane_s64(
-// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
-// CHECK: store i64 [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK-NEXT: store i64 [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
vst1_lane_s64(a, b, 0);
}
-// CHECK-LABEL: @test_vst1_lane_f16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
-// CHECK: store half [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[B]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
+// CHECK-NEXT: store half [[TMP3]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
vst1_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vst1_lane_f32(
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-// CHECK: store float [[TMP3]], ptr %a, align 4
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+// CHECK-NEXT: store float [[TMP3]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
vst1_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vst1_lane_p8(
-// CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
-// CHECK: store i8 [[TMP0]], ptr %a, align 1
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i8> [[B]], i32 7
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A]], align 1
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
vst1_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vst1_lane_p16(
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-// CHECK: store i16 [[TMP3]], ptr %a, align 2
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst1_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[A]], align 2
+// CHECK-NEXT: ret void
+//
void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
vst1_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vst2q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
vst2q_u8(a, b);
}
-// CHECK-LABEL: @test_vst2q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
vst2q_u16(a, b);
}
-// CHECK-LABEL: @test_vst2q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
vst2q_u32(a, b);
}
-// CHECK-LABEL: @test_vst2q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
vst2q_s8(a, b);
}
-// CHECK-LABEL: @test_vst2q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
vst2q_s16(a, b);
}
-// CHECK-LABEL: @test_vst2q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
vst2q_s32(a, b);
}
-// CHECK-LABEL: @test_vst2q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8f16(ptr %a, <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8f16(ptr [[A]], <8 x half> [[TMP2]], <8 x half> [[TMP3]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
vst2q_f16(a, b);
}
-// CHECK-LABEL: @test_vst2q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4f32(ptr %a, <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4f32(ptr [[A]], <4 x float> [[TMP2]], <4 x float> [[TMP3]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
vst2q_f32(a, b);
}
-// CHECK-LABEL: @test_vst2q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
vst2q_p8(a, b);
}
-// CHECK-LABEL: @test_vst2q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
vst2q_p16(a, b);
}
-// CHECK-LABEL: @test_vst2_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
vst2_u8(a, b);
}
-// CHECK-LABEL: @test_vst2_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
vst2_u16(a, b);
}
-// CHECK-LABEL: @test_vst2_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
vst2_u32(a, b);
}
-// CHECK-LABEL: @test_vst2_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v1i64(ptr %a, <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v1i64(ptr [[A]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
vst2_u64(a, b);
}
-// CHECK-LABEL: @test_vst2_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_s8(int8_t * a, int8x8x2_t b) {
vst2_s8(a, b);
}
-// CHECK-LABEL: @test_vst2_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_s16(int16_t * a, int16x4x2_t b) {
vst2_s16(a, b);
}
-// CHECK-LABEL: @test_vst2_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_s32(int32_t * a, int32x2x2_t b) {
vst2_s32(a, b);
}
-// CHECK-LABEL: @test_vst2_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v1i64(ptr %a, <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v1i64(ptr [[A]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_s64(int64_t * a, int64x1x2_t b) {
vst2_s64(a, b);
}
-// CHECK-LABEL: @test_vst2_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4f16(ptr %a, <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4f16(ptr [[A]], <4 x half> [[TMP4]], <4 x half> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_f16(float16_t * a, float16x4x2_t b) {
vst2_f16(a, b);
}
-// CHECK-LABEL: @test_vst2_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v2f32(ptr %a, <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v2f32(ptr [[A]], <2 x float> [[TMP4]], <2 x float> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_f32(float32_t * a, float32x2x2_t b) {
vst2_f32(a, b);
}
-// CHECK-LABEL: @test_vst2_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
vst2_p8(a, b);
}
-// CHECK-LABEL: @test_vst2_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
vst2_p16(a, b);
}
-// CHECK-LABEL: @test_vst2q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
vst2q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vst2q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
vst2q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vst2q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
vst2q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vst2q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
vst2q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vst2q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr %a, <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr [[A]], <8 x half> [[TMP2]], <8 x half> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
vst2q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vst2q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4f32(ptr %a, <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4f32(ptr [[A]], <4 x float> [[TMP2]], <4 x float> [[TMP3]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
vst2q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vst2q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
vst2q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vst2_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
vst2_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vst2_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
vst2_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vst2_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
vst2_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vst2_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
vst2_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vst2_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
vst2_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vst2_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
vst2_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vst2_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr %a, <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr [[A]], <4 x half> [[TMP4]], <4 x half> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
vst2_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vst2_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v2f32(ptr %a, <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v2f32(ptr [[A]], <2 x float> [[TMP4]], <2 x float> [[TMP5]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
vst2_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vst2_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
vst2_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vst2_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst2_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
vst2_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vst3q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
vst3q_u8(a, b);
}
-// CHECK-LABEL: @test_vst3q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
vst3q_u16(a, b);
}
-// CHECK-LABEL: @test_vst3q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
vst3q_u32(a, b);
}
-// CHECK-LABEL: @test_vst3q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
vst3q_s8(a, b);
}
-// CHECK-LABEL: @test_vst3q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
vst3q_s16(a, b);
}
-// CHECK-LABEL: @test_vst3q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
vst3q_s32(a, b);
}
-// CHECK-LABEL: @test_vst3q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8f16(ptr %a, <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8f16(ptr [[A]], <8 x half> [[TMP3]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
vst3q_f16(a, b);
}
-// CHECK-LABEL: @test_vst3q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4f32(ptr %a, <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4f32(ptr [[A]], <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
vst3q_f32(a, b);
}
-// CHECK-LABEL: @test_vst3q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
vst3q_p8(a, b);
}
-// CHECK-LABEL: @test_vst3q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
vst3q_p16(a, b);
}
-// CHECK-LABEL: @test_vst3_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
vst3_u8(a, b);
}
-// CHECK-LABEL: @test_vst3_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
vst3_u16(a, b);
}
-// CHECK-LABEL: @test_vst3_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
vst3_u32(a, b);
}
-// CHECK-LABEL: @test_vst3_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v1i64(ptr %a, <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v1i64(ptr [[A]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
vst3_u64(a, b);
}
-// CHECK-LABEL: @test_vst3_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_s8(int8_t * a, int8x8x3_t b) {
vst3_s8(a, b);
}
-// CHECK-LABEL: @test_vst3_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_s16(int16_t * a, int16x4x3_t b) {
vst3_s16(a, b);
}
-// CHECK-LABEL: @test_vst3_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_s32(int32_t * a, int32x2x3_t b) {
vst3_s32(a, b);
}
-// CHECK-LABEL: @test_vst3_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v1i64(ptr %a, <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v1i64(ptr [[A]], <1 x i64> [[TMP6]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_s64(int64_t * a, int64x1x3_t b) {
vst3_s64(a, b);
}
-// CHECK-LABEL: @test_vst3_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4f16(ptr %a, <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4f16(ptr [[A]], <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_f16(float16_t * a, float16x4x3_t b) {
vst3_f16(a, b);
}
-// CHECK-LABEL: @test_vst3_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v2f32(ptr %a, <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v2f32(ptr [[A]], <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_f32(float32_t * a, float32x2x3_t b) {
vst3_f32(a, b);
}
-// CHECK-LABEL: @test_vst3_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
vst3_p8(a, b);
}
-// CHECK-LABEL: @test_vst3_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
vst3_p16(a, b);
}
-// CHECK-LABEL: @test_vst3q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
vst3q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vst3q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
vst3q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vst3q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
vst3q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vst3q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
vst3q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vst3q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr %a, <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr [[A]], <8 x half> [[TMP3]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
vst3q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vst3q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4f32(ptr %a, <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4f32(ptr [[A]], <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
vst3q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vst3q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [6 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [6 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
vst3q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vst3_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
vst3_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vst3_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
vst3_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vst3_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
vst3_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vst3_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
vst3_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vst3_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
vst3_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vst3_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
vst3_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vst3_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr %a, <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr [[A]], <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x half> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
vst3_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vst3_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v2f32(ptr %a, <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v2f32(ptr [[A]], <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x float> [[TMP8]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
vst3_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vst3_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
vst3_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vst3_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst3_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
vst3_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vst4q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
vst4q_u8(a, b);
}
-// CHECK-LABEL: @test_vst4q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
vst4q_u16(a, b);
}
-// CHECK-LABEL: @test_vst4q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
vst4q_u32(a, b);
}
-// CHECK-LABEL: @test_vst4q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
vst4q_s8(a, b);
}
-// CHECK-LABEL: @test_vst4q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
vst4q_s16(a, b);
}
-// CHECK-LABEL: @test_vst4q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
vst4q_s32(a, b);
}
-// CHECK-LABEL: @test_vst4q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8f16(ptr %a, <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8f16(ptr [[A]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x half> [[TMP6]], <8 x half> [[TMP7]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
vst4q_f16(a, b);
}
-// CHECK-LABEL: @test_vst4q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4f32(ptr %a, <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4f32(ptr [[A]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
vst4q_f32(a, b);
}
-// CHECK-LABEL: @test_vst4q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
-// CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v16i8(ptr [[A]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
vst4q_p8(a, b);
}
-// CHECK-LABEL: @test_vst4q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
vst4q_p16(a, b);
}
-// CHECK-LABEL: @test_vst4_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
vst4_u8(a, b);
}
-// CHECK-LABEL: @test_vst4_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
vst4_u16(a, b);
}
-// CHECK-LABEL: @test_vst4_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
vst4_u32(a, b);
}
-// CHECK-LABEL: @test_vst4_u64(
-// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v1i64(ptr %a, <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_u64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v1i64(ptr [[A]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
vst4_u64(a, b);
}
-// CHECK-LABEL: @test_vst4_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_s8(int8_t * a, int8x8x4_t b) {
vst4_s8(a, b);
}
-// CHECK-LABEL: @test_vst4_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_s16(int16_t * a, int16x4x4_t b) {
vst4_s16(a, b);
}
-// CHECK-LABEL: @test_vst4_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_s32(int32_t * a, int32x2x4_t b) {
vst4_s32(a, b);
}
-// CHECK-LABEL: @test_vst4_s64(
-// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v1i64(ptr %a, <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_s64(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <1 x i64>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v1i64(ptr [[A]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_s64(int64_t * a, int64x1x4_t b) {
vst4_s64(a, b);
}
-// CHECK-LABEL: @test_vst4_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4f16(ptr %a, <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4f16(ptr [[A]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_f16(float16_t * a, float16x4x4_t b) {
vst4_f16(a, b);
}
-// CHECK-LABEL: @test_vst4_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v2f32(ptr %a, <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v2f32(ptr [[A]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_f32(float32_t * a, float32x2x4_t b) {
vst4_f32(a, b);
}
-// CHECK-LABEL: @test_vst4_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
vst4_p8(a, b);
}
-// CHECK-LABEL: @test_vst4_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
vst4_p16(a, b);
}
-// CHECK-LABEL: @test_vst4q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
vst4q_lane_u16(a, b, 7);
}
-// CHECK-LABEL: @test_vst4q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
vst4q_lane_u32(a, b, 3);
}
-// CHECK-LABEL: @test_vst4q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
vst4q_lane_s16(a, b, 7);
}
-// CHECK-LABEL: @test_vst4q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr [[A]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
vst4q_lane_s32(a, b, 3);
}
-// CHECK-LABEL: @test_vst4q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr %a, <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr [[A]], <8 x half> [[TMP4]], <8 x half> [[TMP5]], <8 x half> [[TMP6]], <8 x half> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
vst4q_lane_f16(a, b, 7);
}
-// CHECK-LABEL: @test_vst4q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4f32(ptr %a, <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4f32(ptr [[A]], <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]], i32 3, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
vst4q_lane_f32(a, b, 3);
}
-// CHECK-LABEL: @test_vst4q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
-// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
-// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4q_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [8 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_0_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_0_0_VEC_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_SROA_3_16_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_2_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[B_SROA_3_24_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_3_16_VEC_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_4_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 4
+// CHECK-NEXT: [[B_SROA_6_32_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_4_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_5_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 5
+// CHECK-NEXT: [[B_SROA_6_40_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_6_32_VEC_INSERT]], i64 [[B_COERCE_FCA_5_EXTRACT]], i32 1
+// CHECK-NEXT: [[B_COERCE_FCA_6_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 6
+// CHECK-NEXT: [[B_SROA_9_48_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[B_COERCE_FCA_6_EXTRACT]], i32 0
+// CHECK-NEXT: [[B_COERCE_FCA_7_EXTRACT:%.*]] = extractvalue [8 x i64] [[B_COERCE]], 7
+// CHECK-NEXT: [[B_SROA_9_56_VEC_INSERT:%.*]] = insertelement <2 x i64> [[B_SROA_9_48_VEC_INSERT]], i64 [[B_COERCE_FCA_7_EXTRACT]], i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B_SROA_0_8_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B_SROA_3_24_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[B_SROA_6_40_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[B_SROA_9_56_VEC_INSERT]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr [[A]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[TMP6]], <8 x i16> [[TMP7]], i32 7, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
vst4q_lane_p16(a, b, 7);
}
-// CHECK-LABEL: @test_vst4_lane_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_u8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
vst4_lane_u8(a, b, 7);
}
-// CHECK-LABEL: @test_vst4_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_u16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
vst4_lane_u16(a, b, 3);
}
-// CHECK-LABEL: @test_vst4_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_u32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
vst4_lane_u32(a, b, 1);
}
-// CHECK-LABEL: @test_vst4_lane_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_s8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
vst4_lane_s8(a, b, 7);
}
-// CHECK-LABEL: @test_vst4_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_s16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
vst4_lane_s16(a, b, 3);
}
-// CHECK-LABEL: @test_vst4_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_s32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr [[A]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
vst4_lane_s32(a, b, 1);
}
-// CHECK-LABEL: @test_vst4_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr %a, <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_f16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x half>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr [[A]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
vst4_lane_f16(a, b, 3);
}
-// CHECK-LABEL: @test_vst4_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v2f32(ptr %a, <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_f32(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <2 x float>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x float> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v2f32(ptr [[A]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i32 1, i32 4)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
vst4_lane_f32(a, b, 1);
}
-// CHECK-LABEL: @test_vst4_lane_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_p8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 7, i32 1)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
vst4_lane_p8(a, b, 7);
}
-// CHECK-LABEL: @test_vst4_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
-// CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
-// CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
-// CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
-// CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
-// CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
-// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
-// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
-// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
-// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
-// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
-// CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vst4_lane_p16(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[B_COERCE_FCA_0_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE_FCA_1_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE_FCA_2_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[B_COERCE_FCA_3_EXTRACT]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8>
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
+// CHECK-NEXT: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr [[A]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i32 3, i32 2)
+// CHECK-NEXT: ret void
+//
void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
vst4_lane_p16(a, b, 3);
}
-// CHECK-LABEL: @test_vsub_s8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define <8 x i8> @test_vsub_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
return vsub_s8(a, b);
}
-// CHECK-LABEL: @test_vsub_s16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vsub_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
return vsub_s16(a, b);
}
-// CHECK-LABEL: @test_vsub_s32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vsub_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
return vsub_s32(a, b);
}
-// CHECK-LABEL: @test_vsub_s64(
-// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[SUB_I]]
+// CHECK-LABEL: define <1 x i64> @test_vsub_s64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[SUB_I]]
+//
int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
return vsub_s64(a, b);
}
-// CHECK-LABEL: @test_vsub_f32(
-// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b
-// CHECK: ret <2 x float> [[SUB_I]]
+// CHECK-LABEL: define <2 x float> @test_vsub_f32(
+// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x float> [[SUB_I]]
+//
float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
return vsub_f32(a, b);
}
-// CHECK-LABEL: @test_vsub_u8(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
-// CHECK: ret <8 x i8> [[SUB_I]]
+// CHECK-LABEL: define <8 x i8> @test_vsub_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i8> [[SUB_I]]
+//
uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
return vsub_u8(a, b);
}
-// CHECK-LABEL: @test_vsub_u16(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
-// CHECK: ret <4 x i16> [[SUB_I]]
+// CHECK-LABEL: define <4 x i16> @test_vsub_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i16> [[SUB_I]]
+//
uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
return vsub_u16(a, b);
}
-// CHECK-LABEL: @test_vsub_u32(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
-// CHECK: ret <2 x i32> [[SUB_I]]
+// CHECK-LABEL: define <2 x i32> @test_vsub_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i32> [[SUB_I]]
+//
uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
return vsub_u32(a, b);
}
-// CHECK-LABEL: @test_vsub_u64(
-// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
-// CHECK: ret <1 x i64> [[SUB_I]]
+// CHECK-LABEL: define <1 x i64> @test_vsub_u64(
+// CHECK-SAME: <1 x i64> noundef [[A:%.*]], <1 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <1 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <1 x i64> [[SUB_I]]
+//
uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
return vsub_u64(a, b);
}
-// CHECK-LABEL: @test_vsubq_s8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
return vsubq_s8(a, b);
}
-// CHECK-LABEL: @test_vsubq_s16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
return vsubq_s16(a, b);
}
-// CHECK-LABEL: @test_vsubq_s32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
return vsubq_s32(a, b);
}
-// CHECK-LABEL: @test_vsubq_s64(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
return vsubq_s64(a, b);
}
-// CHECK-LABEL: @test_vsubq_f32(
-// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b
-// CHECK: ret <4 x float> [[SUB_I]]
+// CHECK-LABEL: define <4 x float> @test_vsubq_f32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x float> [[SUB_I]]
+//
float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
return vsubq_f32(a, b);
}
-// CHECK-LABEL: @test_vsubq_u8(
-// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
-// CHECK: ret <16 x i8> [[SUB_I]]
+// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: ret <16 x i8> [[SUB_I]]
+//
uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
return vsubq_u8(a, b);
}
-// CHECK-LABEL: @test_vsubq_u16(
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[B]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
return vsubq_u16(a, b);
}
-// CHECK-LABEL: @test_vsubq_u32(
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[B]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
return vsubq_u32(a, b);
}
-// CHECK-LABEL: @test_vsubq_u64(
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[B]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
return vsubq_u64(a, b);
}
-// CHECK-LABEL: @test_vsubhn_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSUBHN2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSUBHN2_I]]
+//
int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
return vsubhn_s16(a, b);
}
-// CHECK-LABEL: @test_vsubhn_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSUBHN2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSUBHN2_I]]
+//
int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
return vsubhn_s32(a, b);
}
-// CHECK-LABEL: @test_vsubhn_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSUBHN2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSUBHN2_I]]
+//
int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
return vsubhn_s64(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VSUBHN2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VSUBHN2_I]]
+//
uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
return vsubhn_u16(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VSUBHN2_I]]
+// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VSUBHN2_I]]
+//
uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
return vsubhn_u32(a, b);
}
-// CHECK-LABEL: @test_vsubhn_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
-// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
-// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VSUBHN2_I]]
+// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
+// CHECK-NEXT: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VSUBHN2_I]]
+//
uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
return vsubhn_u64(a, b);
}
-// CHECK-LABEL: @test_vsubl_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
return vsubl_s8(a, b);
}
-// CHECK-LABEL: @test_vsubl_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
return vsubl_s16(a, b);
}
-// CHECK-LABEL: @test_vsubl_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
return vsubl_s32(a, b);
}
-// CHECK-LABEL: @test_vsubl_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
return vsubl_u8(a, b);
}
-// CHECK-LABEL: @test_vsubl_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
return vsubl_u16(a, b);
}
-// CHECK-LABEL: @test_vsubl_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
return vsubl_u32(a, b);
}
-// CHECK-LABEL: @test_vsubw_s8(
-// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
return vsubw_s8(a, b);
}
-// CHECK-LABEL: @test_vsubw_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
return vsubw_s16(a, b);
}
-// CHECK-LABEL: @test_vsubw_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
return vsubw_s32(a, b);
}
-// CHECK-LABEL: @test_vsubw_u8(
-// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
-// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
-// CHECK: ret <8 x i16> [[SUB_I]]
+// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <8 x i16> [[SUB_I]]
+//
uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
return vsubw_u8(a, b);
}
-// CHECK-LABEL: @test_vsubw_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
-// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
-// CHECK: ret <4 x i32> [[SUB_I]]
+// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
+//
uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
return vsubw_u16(a, b);
}
-// CHECK-LABEL: @test_vsubw_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
-// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
-// CHECK: ret <2 x i64> [[SUB_I]]
+// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]]
+// CHECK-NEXT: ret <2 x i64> [[SUB_I]]
+//
uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
return vsubw_u32(a, b);
}
-// CHECK-LABEL: @test_vtbl1_u8(
-// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
+//
uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
return vtbl1_u8(a, b);
}
-// CHECK-LABEL: @test_vtbl1_s8(
-// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
+//
int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
return vtbl1_s8(a, b);
}
-// CHECK-LABEL: @test_vtbl1_p8(
-// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[A]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
+//
poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
return vtbl1_p8(a, b);
}
-// CHECK-LABEL: @test_vtbl2_u8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl2_u8(
+// CHECK-SAME: [2 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
+//
uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
return vtbl2_u8(a, b);
}
-// CHECK-LABEL: @test_vtbl2_s8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl2_s8(
+// CHECK-SAME: [2 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
+//
int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
return vtbl2_s8(a, b);
}
-// CHECK-LABEL: @test_vtbl2_p8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl2_p8(
+// CHECK-SAME: [2 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
+//
poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
return vtbl2_p8(a, b);
}
-// CHECK-LABEL: @test_vtbl3_u8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl3_u8(
+// CHECK-SAME: [3 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
+//
uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
return vtbl3_u8(a, b);
}
-// CHECK-LABEL: @test_vtbl3_s8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl3_s8(
+// CHECK-SAME: [3 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
+//
int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
return vtbl3_s8(a, b);
}
-// CHECK-LABEL: @test_vtbl3_p8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl3_p8(
+// CHECK-SAME: [3 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
+//
poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
return vtbl3_p8(a, b);
}
-// CHECK-LABEL: @test_vtbl4_u8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl4_u8(
+// CHECK-SAME: [4 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[A_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
+//
uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
return vtbl4_u8(a, b);
}
-// CHECK-LABEL: @test_vtbl4_s8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl4_s8(
+// CHECK-SAME: [4 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[A_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
+//
int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
return vtbl4_s8(a, b);
}
-// CHECK-LABEL: @test_vtbl4_p8(
-// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[A]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
-// CHECK: ret <8 x i8> [[VTBL4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbl4_p8(
+// CHECK-SAME: [4 x i64] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 0
+// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 1
+// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 2
+// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[A_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[A_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[A_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[A_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[A_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[B]])
+// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
+//
poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
return vtbl4_p8(a, b);
}
-// CHECK-LABEL: @test_vtbx1_u8(
-// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
+//
uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vtbx1_u8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx1_s8(
-// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
+//
int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
return vtbx1_s8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx1_p8(
-// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX1_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> [[A]], <8 x i8> [[B]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
+//
poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
return vtbx1_p8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx2_u8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
+//
uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
return vtbx2_u8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx2_s8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
+//
int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
return vtbx2_s8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx2_p8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX2_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [2 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[DOTFCA_1_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_1_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
+//
poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
return vtbx2_p8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx3_u8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
+//
uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
return vtbx3_u8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx3_s8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
+//
int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
return vtbx3_s8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx3_p8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX3_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [3 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i64] [[DOTFCA_2_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_2_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
+//
poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
return vtbx3_p8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx4_u8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
+//
uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
return vtbx4_u8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx4_s8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
+//
int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
return vtbx4_s8(a, b, c);
}
-// CHECK-LABEL: @test_vtbx4_p8(
-// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
-// CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
-// CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
-// CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
-// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
-// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
-// CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
-// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
-// CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
-// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
-// CHECK: ret <8 x i8> [[VTBX4_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], [4 x i64] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[B_COERCE]], 3
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i64] poison, i64 [[B_COERCE_FCA_0_EXTRACT]], 0
+// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_0_INSERT]], i64 [[B_COERCE_FCA_1_EXTRACT]], 1
+// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_1_INSERT]], i64 [[B_COERCE_FCA_2_EXTRACT]], 2
+// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i64] [[DOTFCA_2_INSERT]], i64 [[B_COERCE_FCA_3_EXTRACT]], 3
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 0
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_0_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_1_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_2_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i64] [[DOTFCA_3_INSERT]], 3
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[DOTFCA_3_INSERT_FCA_3_EXTRACT]] to <8 x i8>
+// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> [[A]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[C]])
+// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
+//
poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
return vtbx4_p8(a, b, c);
}
-// CHECK: @test_vtrn_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META3]]
+// CHECK-NEXT: ret void
+//
int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
return vtrn_s8(a, b);
}
-// CHECK: @test_vtrn_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META6]]
+// CHECK-NEXT: ret void
+//
int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
return vtrn_s16(a, b);
}
-// CHECK: @test_vtrn_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META9]]
+// CHECK-NEXT: ret void
+//
int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
return vtrn_s32(a, b);
}
-// CHECK: @test_vtrn_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META12]]
+// CHECK-NEXT: ret void
+//
uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
return vtrn_u8(a, b);
}
-// CHECK: @test_vtrn_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META15]]
+// CHECK-NEXT: ret void
+//
uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
return vtrn_u16(a, b);
}
-// CHECK: @test_vtrn_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META18]]
+// CHECK-NEXT: ret void
+//
uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
return vtrn_u32(a, b);
}
-// CHECK: @test_vtrn_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META21]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x float> [[VTRN1_I]], ptr [[TMP6]], align 4, !alias.scope [[META21]]
+// CHECK-NEXT: ret void
+//
float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
return vtrn_f32(a, b);
}
-// CHECK: @test_vtrn_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META24]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META24]]
+// CHECK-NEXT: ret void
+//
poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
return vtrn_p8(a, b);
}
-// CHECK: @test_vtrn_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrn_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META27]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META27]]
+// CHECK-NEXT: ret void
+//
poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
return vtrn_p16(a, b);
}
-// CHECK: @test_vtrnq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META30]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META30]]
+// CHECK-NEXT: ret void
+//
int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
return vtrnq_s8(a, b);
}
-// CHECK: @test_vtrnq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META33]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META33]]
+// CHECK-NEXT: ret void
+//
int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
return vtrnq_s16(a, b);
}
-// CHECK: @test_vtrnq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META36]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META36]]
+// CHECK-NEXT: ret void
+//
int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
return vtrnq_s32(a, b);
}
-// CHECK: @test_vtrnq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META39]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META39]]
+// CHECK-NEXT: ret void
+//
uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
return vtrnq_u8(a, b);
}
-// CHECK: @test_vtrnq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META42]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META42]]
+// CHECK-NEXT: ret void
+//
uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
return vtrnq_u16(a, b);
}
-// CHECK: @test_vtrnq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META45]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META45]]
+// CHECK-NEXT: ret void
+//
uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
return vtrnq_u32(a, b);
}
-// CHECK: @test_vtrnq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK: store <4 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK: store <4 x float> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT: store <4 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META48]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT: store <4 x float> [[VTRN1_I]], ptr [[TMP6]], align 4, !alias.scope [[META48]]
+// CHECK-NEXT: ret void
+//
float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
return vtrnq_f32(a, b);
}
-// CHECK: @test_vtrnq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]])
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META51]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VTRN1_I]], ptr [[TMP0]], align 4, !alias.scope [[META51]]
+// CHECK-NEXT: ret void
+//
poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
return vtrnq_p8(a, b);
}
-// CHECK: @test_vtrnq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vtrnq_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META54]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope [[META54]]
+// CHECK-NEXT: ret void
+//
poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
return vtrnq_p16(a, b);
}
-// CHECK-LABEL: @test_vtst_s8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtst_s8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
return vtst_s8(a, b);
}
-// CHECK-LABEL: @test_vtst_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define <4 x i16> @test_vtst_s16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
return vtst_s16(a, b);
}
-// CHECK-LABEL: @test_vtst_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VTST_I]]
+// CHECK-LABEL: define <2 x i32> @test_vtst_s32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VTST_I]]
+//
uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
return vtst_s32(a, b);
}
-// CHECK-LABEL: @test_vtst_u8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtst_u8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
return vtst_u8(a, b);
}
-// CHECK-LABEL: @test_vtst_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define <4 x i16> @test_vtst_u16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
return vtst_u16(a, b);
}
-// CHECK-LABEL: @test_vtst_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK: ret <2 x i32> [[VTST_I]]
+// CHECK-LABEL: define <2 x i32> @test_vtst_u32(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
+// CHECK-NEXT: ret <2 x i32> [[VTST_I]]
+//
uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
return vtst_u32(a, b);
}
-// CHECK-LABEL: @test_vtst_p8(
-// CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
-// CHECK: ret <8 x i8> [[VTST_I]]
+// CHECK-LABEL: define <8 x i8> @test_vtst_p8(
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: ret <8 x i8> [[VTST_I]]
+//
uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
return vtst_p8(a, b);
}
-// CHECK-LABEL: @test_vtst_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
-// CHECK: ret <4 x i16> [[VTST_I]]
+// CHECK-LABEL: define <4 x i16> @test_vtst_p16(
+// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
+// CHECK-NEXT: ret <4 x i16> [[VTST_I]]
+//
uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
return vtst_p16(a, b);
}
-// CHECK-LABEL: @test_vtstq_s8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
return vtstq_s8(a, b);
}
-// CHECK-LABEL: @test_vtstq_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
return vtstq_s16(a, b);
}
-// CHECK-LABEL: @test_vtstq_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VTST_I]]
+// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VTST_I]]
+//
uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
return vtstq_s32(a, b);
}
-// CHECK-LABEL: @test_vtstq_u8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
return vtstq_u8(a, b);
}
-// CHECK-LABEL: @test_vtstq_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
return vtstq_u16(a, b);
}
-// CHECK-LABEL: @test_vtstq_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK: ret <4 x i32> [[VTST_I]]
+// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+// CHECK-NEXT: ret <4 x i32> [[VTST_I]]
+//
uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
return vtstq_u32(a, b);
}
-// CHECK-LABEL: @test_vtstq_p8(
-// CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
-// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
-// CHECK: ret <16 x i8> [[VTST_I]]
+// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(
+// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = and <16 x i8> [[A]], [[B]]
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: ret <16 x i8> [[VTST_I]]
+//
uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
return vtstq_p8(a, b);
}
-// CHECK-LABEL: @test_vtstq_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
-// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
-// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
-// CHECK: ret <8 x i16> [[VTST_I]]
+// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(
+// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
+// CHECK-NEXT: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+// CHECK-NEXT: ret <8 x i16> [[VTST_I]]
+//
uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
return vtstq_p16(a, b);
}
-// CHECK: @test_vuzp_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META57]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META57]]
+// CHECK-NEXT: ret void
+//
int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
return vuzp_s8(a, b);
}
-// CHECK: @test_vuzp_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META60]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META60]]
+// CHECK-NEXT: ret void
+//
int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
return vuzp_s16(a, b);
}
-// CHECK: @test_vuzp_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META63]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META63]]
+// CHECK-NEXT: ret void
+//
int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
return vuzp_s32(a, b);
}
-// CHECK: @test_vuzp_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META66]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META66]]
+// CHECK-NEXT: ret void
+//
uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
return vuzp_u8(a, b);
}
-// CHECK: @test_vuzp_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META69]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META69]]
+// CHECK-NEXT: ret void
+//
uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
return vuzp_u16(a, b);
}
-// CHECK: @test_vuzp_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META72]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META72]]
+// CHECK-NEXT: ret void
+//
uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
return vuzp_u32(a, b);
}
-// CHECK: @test_vuzp_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META75]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x float> [[VUZP1_I]], ptr [[TMP6]], align 4, !alias.scope [[META75]]
+// CHECK-NEXT: ret void
+//
float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
return vuzp_f32(a, b);
}
-// CHECK: @test_vuzp_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META78]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META78]]
+// CHECK-NEXT: ret void
+//
poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
return vuzp_p8(a, b);
}
-// CHECK: @test_vuzp_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzp_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META81:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META81]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META81]]
+// CHECK-NEXT: ret void
+//
poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
return vuzp_p16(a, b);
}
-// CHECK: @test_vuzpq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META84:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META84]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META84]]
+// CHECK-NEXT: ret void
+//
int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
return vuzpq_s8(a, b);
}
-// CHECK: @test_vuzpq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META87]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META87]]
+// CHECK-NEXT: ret void
+//
int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
return vuzpq_s16(a, b);
}
-// CHECK: @test_vuzpq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META90]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META90]]
+// CHECK-NEXT: ret void
+//
int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
return vuzpq_s32(a, b);
}
-// CHECK: @test_vuzpq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META93]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META93]]
+// CHECK-NEXT: ret void
+//
uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
return vuzpq_u8(a, b);
}
-// CHECK: @test_vuzpq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META96]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META96]]
+// CHECK-NEXT: ret void
+//
uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
return vuzpq_u16(a, b);
}
-// CHECK: @test_vuzpq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META99]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META99]]
+// CHECK-NEXT: ret void
+//
uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
return vuzpq_u32(a, b);
}
-// CHECK: @test_vuzpq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-// CHECK: store <4 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-// CHECK: store <4 x float> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT: store <4 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META102]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT: store <4 x float> [[VUZP1_I]], ptr [[TMP6]], align 4, !alias.scope [[META102]]
+// CHECK-NEXT: ret void
+//
float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
return vuzpq_f32(a, b);
}
-// CHECK: @test_vuzpq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-// CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-// CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]])
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+// CHECK-NEXT: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META105]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VUZP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META105]]
+// CHECK-NEXT: ret void
+//
poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
return vuzpq_p8(a, b);
}
-// CHECK: @test_vuzpq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-// CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-// CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vuzpq_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META108]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META108]]
+// CHECK-NEXT: ret void
+//
poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
return vuzpq_p16(a, b);
}
-// CHECK: @test_vzip_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META111]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META111]]
+// CHECK-NEXT: ret void
+//
int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
return vzip_s8(a, b);
}
-// CHECK: @test_vzip_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META114:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META114]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META114]]
+// CHECK-NEXT: ret void
+//
int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
return vzip_s16(a, b);
}
-// CHECK: @test_vzip_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META117]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META117]]
+// CHECK-NEXT: ret void
+//
int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
return vzip_s32(a, b);
}
-// CHECK: @test_vzip_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META120]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META120]]
+// CHECK-NEXT: ret void
+//
uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
return vzip_u8(a, b);
}
-// CHECK: @test_vzip_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META123]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META123]]
+// CHECK-NEXT: ret void
+//
uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
return vzip_u16(a, b);
}
-// CHECK: @test_vzip_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META126]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META126]]
+// CHECK-NEXT: ret void
+//
uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
return vzip_u32(a, b);
}
-// CHECK: @test_vzip_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
-// CHECK: store <2 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
-// CHECK: store <2 x float> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X2X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META129:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: store <2 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META129]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: store <2 x float> [[VZIP1_I]], ptr [[TMP6]], align 4, !alias.scope [[META129]]
+// CHECK-NEXT: ret void
+//
float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
return vzip_f32(a, b);
}
-// CHECK: @test_vzip_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X8X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META132:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META132]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META132]]
+// CHECK-NEXT: ret void
+//
poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
return vzip_p8(a, b);
}
-// CHECK: @test_vzip_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzip_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META135:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META135]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META135]]
+// CHECK-NEXT: ret void
+//
poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
return vzip_p16(a, b);
}
-// CHECK: @test_vzipq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_s8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META138]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META138]]
+// CHECK-NEXT: ret void
+//
int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
return vzipq_s8(a, b);
}
-// CHECK: @test_vzipq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_s16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META141]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META141]]
+// CHECK-NEXT: ret void
+//
int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
return vzipq_s16(a, b);
}
-// CHECK: @test_vzipq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_s32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_INT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META144:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META144]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META144]]
+// CHECK-NEXT: ret void
+//
int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
return vzipq_s32(a, b);
}
-// CHECK: @test_vzipq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_u8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META147:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META147]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META147]]
+// CHECK-NEXT: ret void
+//
uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
return vzipq_u8(a, b);
}
-// CHECK: @test_vzipq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_u16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META150]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META150]]
+// CHECK-NEXT: ret void
+//
uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
return vzipq_u16(a, b);
}
-// CHECK: @test_vzipq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_u32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_UINT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META153:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META153]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META153]]
+// CHECK-NEXT: ret void
+//
uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
return vzipq_u32(a, b);
}
-// CHECK: @test_vzipq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-// CHECK: store <4 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-// CHECK: store <4 x float> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_f32(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT32X4X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META156:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT: store <4 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META156]]
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT: store <4 x float> [[VZIP1_I]], ptr [[TMP6]], align 4, !alias.scope [[META156]]
+// CHECK-NEXT: ret void
+//
float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
return vzipq_f32(a, b);
}
-// CHECK: @test_vzipq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-// CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-// CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_p8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY8X16X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META159:![0-9]+]])
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+// CHECK-NEXT: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META159]]
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+// CHECK-NEXT: store <16 x i8> [[VZIP1_I]], ptr [[TMP0]], align 4, !alias.scope [[META159]]
+// CHECK-NEXT: ret void
+//
poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
return vzipq_p8(a, b);
}
-// CHECK: @test_vzipq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-// CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
-// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
-// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-// CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
-// CHECK: ret void
+// CHECK-LABEL: define void @test_vzipq_p16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_POLY16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META162:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META162]]
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
+// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope [[META162]]
+// CHECK-NEXT: ret void
+//
poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
return vzipq_p16(a, b);
}
+//.
+// CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+// CHECK: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vtrn_s8: %agg.result"}
+// CHECK: [[META5]] = distinct !{[[META5]], !"vtrn_s8"}
+// CHECK: [[META6]] = !{[[META7:![0-9]+]]}
+// CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vtrn_s16: %agg.result"}
+// CHECK: [[META8]] = distinct !{[[META8]], !"vtrn_s16"}
+// CHECK: [[META9]] = !{[[META10:![0-9]+]]}
+// CHECK: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vtrn_s32: %agg.result"}
+// CHECK: [[META11]] = distinct !{[[META11]], !"vtrn_s32"}
+// CHECK: [[META12]] = !{[[META13:![0-9]+]]}
+// CHECK: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vtrn_u8: %agg.result"}
+// CHECK: [[META14]] = distinct !{[[META14]], !"vtrn_u8"}
+// CHECK: [[META15]] = !{[[META16:![0-9]+]]}
+// CHECK: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_u16: %agg.result"}
+// CHECK: [[META17]] = distinct !{[[META17]], !"vtrn_u16"}
+// CHECK: [[META18]] = !{[[META19:![0-9]+]]}
+// CHECK: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrn_u32: %agg.result"}
+// CHECK: [[META20]] = distinct !{[[META20]], !"vtrn_u32"}
+// CHECK: [[META21]] = !{[[META22:![0-9]+]]}
+// CHECK: [[META22]] = distinct !{[[META22]], [[META23:![0-9]+]], !"vtrn_f32: %agg.result"}
+// CHECK: [[META23]] = distinct !{[[META23]], !"vtrn_f32"}
+// CHECK: [[META24]] = !{[[META25:![0-9]+]]}
+// CHECK: [[META25]] = distinct !{[[META25]], [[META26:![0-9]+]], !"vtrn_p8: %agg.result"}
+// CHECK: [[META26]] = distinct !{[[META26]], !"vtrn_p8"}
+// CHECK: [[META27]] = !{[[META28:![0-9]+]]}
+// CHECK: [[META28]] = distinct !{[[META28]], [[META29:![0-9]+]], !"vtrn_p16: %agg.result"}
+// CHECK: [[META29]] = distinct !{[[META29]], !"vtrn_p16"}
+// CHECK: [[META30]] = !{[[META31:![0-9]+]]}
+// CHECK: [[META31]] = distinct !{[[META31]], [[META32:![0-9]+]], !"vtrnq_s8: %agg.result"}
+// CHECK: [[META32]] = distinct !{[[META32]], !"vtrnq_s8"}
+// CHECK: [[META33]] = !{[[META34:![0-9]+]]}
+// CHECK: [[META34]] = distinct !{[[META34]], [[META35:![0-9]+]], !"vtrnq_s16: %agg.result"}
+// CHECK: [[META35]] = distinct !{[[META35]], !"vtrnq_s16"}
+// CHECK: [[META36]] = !{[[META37:![0-9]+]]}
+// CHECK: [[META37]] = distinct !{[[META37]], [[META38:![0-9]+]], !"vtrnq_s32: %agg.result"}
+// CHECK: [[META38]] = distinct !{[[META38]], !"vtrnq_s32"}
+// CHECK: [[META39]] = !{[[META40:![0-9]+]]}
+// CHECK: [[META40]] = distinct !{[[META40]], [[META41:![0-9]+]], !"vtrnq_u8: %agg.result"}
+// CHECK: [[META41]] = distinct !{[[META41]], !"vtrnq_u8"}
+// CHECK: [[META42]] = !{[[META43:![0-9]+]]}
+// CHECK: [[META43]] = distinct !{[[META43]], [[META44:![0-9]+]], !"vtrnq_u16: %agg.result"}
+// CHECK: [[META44]] = distinct !{[[META44]], !"vtrnq_u16"}
+// CHECK: [[META45]] = !{[[META46:![0-9]+]]}
+// CHECK: [[META46]] = distinct !{[[META46]], [[META47:![0-9]+]], !"vtrnq_u32: %agg.result"}
+// CHECK: [[META47]] = distinct !{[[META47]], !"vtrnq_u32"}
+// CHECK: [[META48]] = !{[[META49:![0-9]+]]}
+// CHECK: [[META49]] = distinct !{[[META49]], [[META50:![0-9]+]], !"vtrnq_f32: %agg.result"}
+// CHECK: [[META50]] = distinct !{[[META50]], !"vtrnq_f32"}
+// CHECK: [[META51]] = !{[[META52:![0-9]+]]}
+// CHECK: [[META52]] = distinct !{[[META52]], [[META53:![0-9]+]], !"vtrnq_p8: %agg.result"}
+// CHECK: [[META53]] = distinct !{[[META53]], !"vtrnq_p8"}
+// CHECK: [[META54]] = !{[[META55:![0-9]+]]}
+// CHECK: [[META55]] = distinct !{[[META55]], [[META56:![0-9]+]], !"vtrnq_p16: %agg.result"}
+// CHECK: [[META56]] = distinct !{[[META56]], !"vtrnq_p16"}
+// CHECK: [[META57]] = !{[[META58:![0-9]+]]}
+// CHECK: [[META58]] = distinct !{[[META58]], [[META59:![0-9]+]], !"vuzp_s8: %agg.result"}
+// CHECK: [[META59]] = distinct !{[[META59]], !"vuzp_s8"}
+// CHECK: [[META60]] = !{[[META61:![0-9]+]]}
+// CHECK: [[META61]] = distinct !{[[META61]], [[META62:![0-9]+]], !"vuzp_s16: %agg.result"}
+// CHECK: [[META62]] = distinct !{[[META62]], !"vuzp_s16"}
+// CHECK: [[META63]] = !{[[META64:![0-9]+]]}
+// CHECK: [[META64]] = distinct !{[[META64]], [[META65:![0-9]+]], !"vuzp_s32: %agg.result"}
+// CHECK: [[META65]] = distinct !{[[META65]], !"vuzp_s32"}
+// CHECK: [[META66]] = !{[[META67:![0-9]+]]}
+// CHECK: [[META67]] = distinct !{[[META67]], [[META68:![0-9]+]], !"vuzp_u8: %agg.result"}
+// CHECK: [[META68]] = distinct !{[[META68]], !"vuzp_u8"}
+// CHECK: [[META69]] = !{[[META70:![0-9]+]]}
+// CHECK: [[META70]] = distinct !{[[META70]], [[META71:![0-9]+]], !"vuzp_u16: %agg.result"}
+// CHECK: [[META71]] = distinct !{[[META71]], !"vuzp_u16"}
+// CHECK: [[META72]] = !{[[META73:![0-9]+]]}
+// CHECK: [[META73]] = distinct !{[[META73]], [[META74:![0-9]+]], !"vuzp_u32: %agg.result"}
+// CHECK: [[META74]] = distinct !{[[META74]], !"vuzp_u32"}
+// CHECK: [[META75]] = !{[[META76:![0-9]+]]}
+// CHECK: [[META76]] = distinct !{[[META76]], [[META77:![0-9]+]], !"vuzp_f32: %agg.result"}
+// CHECK: [[META77]] = distinct !{[[META77]], !"vuzp_f32"}
+// CHECK: [[META78]] = !{[[META79:![0-9]+]]}
+// CHECK: [[META79]] = distinct !{[[META79]], [[META80:![0-9]+]], !"vuzp_p8: %agg.result"}
+// CHECK: [[META80]] = distinct !{[[META80]], !"vuzp_p8"}
+// CHECK: [[META81]] = !{[[META82:![0-9]+]]}
+// CHECK: [[META82]] = distinct !{[[META82]], [[META83:![0-9]+]], !"vuzp_p16: %agg.result"}
+// CHECK: [[META83]] = distinct !{[[META83]], !"vuzp_p16"}
+// CHECK: [[META84]] = !{[[META85:![0-9]+]]}
+// CHECK: [[META85]] = distinct !{[[META85]], [[META86:![0-9]+]], !"vuzpq_s8: %agg.result"}
+// CHECK: [[META86]] = distinct !{[[META86]], !"vuzpq_s8"}
+// CHECK: [[META87]] = !{[[META88:![0-9]+]]}
+// CHECK: [[META88]] = distinct !{[[META88]], [[META89:![0-9]+]], !"vuzpq_s16: %agg.result"}
+// CHECK: [[META89]] = distinct !{[[META89]], !"vuzpq_s16"}
+// CHECK: [[META90]] = !{[[META91:![0-9]+]]}
+// CHECK: [[META91]] = distinct !{[[META91]], [[META92:![0-9]+]], !"vuzpq_s32: %agg.result"}
+// CHECK: [[META92]] = distinct !{[[META92]], !"vuzpq_s32"}
+// CHECK: [[META93]] = !{[[META94:![0-9]+]]}
+// CHECK: [[META94]] = distinct !{[[META94]], [[META95:![0-9]+]], !"vuzpq_u8: %agg.result"}
+// CHECK: [[META95]] = distinct !{[[META95]], !"vuzpq_u8"}
+// CHECK: [[META96]] = !{[[META97:![0-9]+]]}
+// CHECK: [[META97]] = distinct !{[[META97]], [[META98:![0-9]+]], !"vuzpq_u16: %agg.result"}
+// CHECK: [[META98]] = distinct !{[[META98]], !"vuzpq_u16"}
+// CHECK: [[META99]] = !{[[META100:![0-9]+]]}
+// CHECK: [[META100]] = distinct !{[[META100]], [[META101:![0-9]+]], !"vuzpq_u32: %agg.result"}
+// CHECK: [[META101]] = distinct !{[[META101]], !"vuzpq_u32"}
+// CHECK: [[META102]] = !{[[META103:![0-9]+]]}
+// CHECK: [[META103]] = distinct !{[[META103]], [[META104:![0-9]+]], !"vuzpq_f32: %agg.result"}
+// CHECK: [[META104]] = distinct !{[[META104]], !"vuzpq_f32"}
+// CHECK: [[META105]] = !{[[META106:![0-9]+]]}
+// CHECK: [[META106]] = distinct !{[[META106]], [[META107:![0-9]+]], !"vuzpq_p8: %agg.result"}
+// CHECK: [[META107]] = distinct !{[[META107]], !"vuzpq_p8"}
+// CHECK: [[META108]] = !{[[META109:![0-9]+]]}
+// CHECK: [[META109]] = distinct !{[[META109]], [[META110:![0-9]+]], !"vuzpq_p16: %agg.result"}
+// CHECK: [[META110]] = distinct !{[[META110]], !"vuzpq_p16"}
+// CHECK: [[META111]] = !{[[META112:![0-9]+]]}
+// CHECK: [[META112]] = distinct !{[[META112]], [[META113:![0-9]+]], !"vzip_s8: %agg.result"}
+// CHECK: [[META113]] = distinct !{[[META113]], !"vzip_s8"}
+// CHECK: [[META114]] = !{[[META115:![0-9]+]]}
+// CHECK: [[META115]] = distinct !{[[META115]], [[META116:![0-9]+]], !"vzip_s16: %agg.result"}
+// CHECK: [[META116]] = distinct !{[[META116]], !"vzip_s16"}
+// CHECK: [[META117]] = !{[[META118:![0-9]+]]}
+// CHECK: [[META118]] = distinct !{[[META118]], [[META119:![0-9]+]], !"vzip_s32: %agg.result"}
+// CHECK: [[META119]] = distinct !{[[META119]], !"vzip_s32"}
+// CHECK: [[META120]] = !{[[META121:![0-9]+]]}
+// CHECK: [[META121]] = distinct !{[[META121]], [[META122:![0-9]+]], !"vzip_u8: %agg.result"}
+// CHECK: [[META122]] = distinct !{[[META122]], !"vzip_u8"}
+// CHECK: [[META123]] = !{[[META124:![0-9]+]]}
+// CHECK: [[META124]] = distinct !{[[META124]], [[META125:![0-9]+]], !"vzip_u16: %agg.result"}
+// CHECK: [[META125]] = distinct !{[[META125]], !"vzip_u16"}
+// CHECK: [[META126]] = !{[[META127:![0-9]+]]}
+// CHECK: [[META127]] = distinct !{[[META127]], [[META128:![0-9]+]], !"vzip_u32: %agg.result"}
+// CHECK: [[META128]] = distinct !{[[META128]], !"vzip_u32"}
+// CHECK: [[META129]] = !{[[META130:![0-9]+]]}
+// CHECK: [[META130]] = distinct !{[[META130]], [[META131:![0-9]+]], !"vzip_f32: %agg.result"}
+// CHECK: [[META131]] = distinct !{[[META131]], !"vzip_f32"}
+// CHECK: [[META132]] = !{[[META133:![0-9]+]]}
+// CHECK: [[META133]] = distinct !{[[META133]], [[META134:![0-9]+]], !"vzip_p8: %agg.result"}
+// CHECK: [[META134]] = distinct !{[[META134]], !"vzip_p8"}
+// CHECK: [[META135]] = !{[[META136:![0-9]+]]}
+// CHECK: [[META136]] = distinct !{[[META136]], [[META137:![0-9]+]], !"vzip_p16: %agg.result"}
+// CHECK: [[META137]] = distinct !{[[META137]], !"vzip_p16"}
+// CHECK: [[META138]] = !{[[META139:![0-9]+]]}
+// CHECK: [[META139]] = distinct !{[[META139]], [[META140:![0-9]+]], !"vzipq_s8: %agg.result"}
+// CHECK: [[META140]] = distinct !{[[META140]], !"vzipq_s8"}
+// CHECK: [[META141]] = !{[[META142:![0-9]+]]}
+// CHECK: [[META142]] = distinct !{[[META142]], [[META143:![0-9]+]], !"vzipq_s16: %agg.result"}
+// CHECK: [[META143]] = distinct !{[[META143]], !"vzipq_s16"}
+// CHECK: [[META144]] = !{[[META145:![0-9]+]]}
+// CHECK: [[META145]] = distinct !{[[META145]], [[META146:![0-9]+]], !"vzipq_s32: %agg.result"}
+// CHECK: [[META146]] = distinct !{[[META146]], !"vzipq_s32"}
+// CHECK: [[META147]] = !{[[META148:![0-9]+]]}
+// CHECK: [[META148]] = distinct !{[[META148]], [[META149:![0-9]+]], !"vzipq_u8: %agg.result"}
+// CHECK: [[META149]] = distinct !{[[META149]], !"vzipq_u8"}
+// CHECK: [[META150]] = !{[[META151:![0-9]+]]}
+// CHECK: [[META151]] = distinct !{[[META151]], [[META152:![0-9]+]], !"vzipq_u16: %agg.result"}
+// CHECK: [[META152]] = distinct !{[[META152]], !"vzipq_u16"}
+// CHECK: [[META153]] = !{[[META154:![0-9]+]]}
+// CHECK: [[META154]] = distinct !{[[META154]], [[META155:![0-9]+]], !"vzipq_u32: %agg.result"}
+// CHECK: [[META155]] = distinct !{[[META155]], !"vzipq_u32"}
+// CHECK: [[META156]] = !{[[META157:![0-9]+]]}
+// CHECK: [[META157]] = distinct !{[[META157]], [[META158:![0-9]+]], !"vzipq_f32: %agg.result"}
+// CHECK: [[META158]] = distinct !{[[META158]], !"vzipq_f32"}
+// CHECK: [[META159]] = !{[[META160:![0-9]+]]}
+// CHECK: [[META160]] = distinct !{[[META160]], [[META161:![0-9]+]], !"vzipq_p8: %agg.result"}
+// CHECK: [[META161]] = distinct !{[[META161]], !"vzipq_p8"}
+// CHECK: [[META162]] = !{[[META163:![0-9]+]]}
+// CHECK: [[META163]] = distinct !{[[META163]], [[META164:![0-9]+]], !"vzipq_p16: %agg.result"}
+// CHECK: [[META164]] = distinct !{[[META164]], !"vzipq_p16"}
+//.
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 51eb02be58692..51bfe212464cf 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1367,7 +1367,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
LocalCK = ClassB;
if (!getReturnType().isVoid() && !SRet)
- S += "(" + RetVar.getType().str() + ") ";
+ S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
@@ -1387,11 +1387,12 @@ void Intrinsic::emitBodyAsBuiltinCall() {
Type T2 = T;
T2.makeOneVector();
T2.makeInteger(8, /*Sign=*/true);
- Cast = "(" + T2.str() + ")";
+ Cast = "__builtin_bit_cast(" + T2.str() + ", ";
}
for (unsigned J = 0; J < T.getNumVectors(); ++J)
- S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
+ S += Cast + V.getName() + ".val[" + utostr(J) + "]" +
+ (Cast.empty() ? ", " : "), ");
continue;
}
@@ -1399,14 +1400,16 @@ void Intrinsic::emitBodyAsBuiltinCall() {
Type CastToType = T;
// Check if an explicit cast is needed.
- if (CastToType.isVector() &&
- (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {
- CastToType.makeInteger(8, true);
- Arg = "(" + CastToType.str() + ")" + Arg;
- } else if (CastToType.isVector() && LocalCK == ClassI) {
- if (CastToType.isInteger())
- CastToType.makeSigned();
- Arg = "(" + CastToType.str() + ")" + Arg;
+ if (CastToType.isVector()) {
+ if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
+ CastToType.makeInteger(8, true);
+ Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+ } else if (LocalCK == ClassI) {
+ if (CastToType.isInteger()) {
+ CastToType.makeSigned();
+ Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
+ }
+ }
}
S += Arg + ", ";
@@ -1420,6 +1423,9 @@ void Intrinsic::emitBodyAsBuiltinCall() {
S.pop_back();
S.pop_back();
}
+
+ if (!getReturnType().isVoid() && !SRet)
+ S += ")";
S += ");";
std::string RetExpr;
diff --git a/llvm/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.ll b/llvm/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.ll
new file mode 100644
index 0000000000000..58f1accdf91d5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.ll
@@ -0,0 +1,276 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+target triple = "arm64-none-linux-gnu"
+
+define <4 x half> @test_vsqrt_f16(<4 x half> %a) #0 {
+; CHECK-LABEL: test_vsqrt_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fsqrt v0.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %vsqrt.i = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %vsqrt.i
+}
+
+define <8 x half> @test_vsqrtq_f16(<8 x half> %a) #0 {
+; CHECK-LABEL: test_vsqrtq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fsqrt v0.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %vsqrt.i = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %vsqrt.i
+}
+
+define <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfma_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT: ret
+entry:
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmaq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT: ret
+entry:
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfms_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls v0.4h, v2.4h, v1.4h
+; CHECK-NEXT: ret
+entry:
+ %fneg.i = fneg <4 x half> %b
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %fneg.i, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmsq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT: ret
+entry:
+ %fneg.i = fneg <8 x half> %b
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %fneg.i, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define <4 x half> @test_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfma_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %lane = shufflevector <4 x half> %c, <4 x half> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %fmla2 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %lane, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %fmla2
+}
+
+define <8 x half> @test_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfmaq_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %lane = shufflevector <4 x half> %c, <4 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %fmla2 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %lane, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %fmla2
+}
+
+define <4 x half> @test_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfma_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %lane = shufflevector <8 x half> %c, <8 x half> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %lane, <4 x half> %b, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmaq_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %lane = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %lane, <8 x half> %b, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define <4 x half> @test_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) #0 {
+; CHECK-LABEL: test_vfma_n_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
+entry:
+ %vecinit = insertelement <4 x half> poison, half %c, i64 0
+ %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> poison, <4 x i32> zeroinitializer
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) #0 {
+; CHECK-LABEL: test_vfmaq_n_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
+; CHECK-NEXT: ret
+entry:
+ %vecinit = insertelement <8 x half> poison, half %c, i64 0
+ %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> poison, <8 x i32> zeroinitializer
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define half @test_vfmah_lane_f16(half %a, half %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfmah_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmla h0, h1, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %extract = extractelement <4 x half> %c, i64 3
+ %0 = call half @llvm.experimental.constrained.fma.f16(half %b, half %extract, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret half %0
+}
+
+define half @test_vfmah_laneq_f16(half %a, half %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmah_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmla h0, h1, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %extract = extractelement <8 x half> %c, i64 7
+ %0 = call half @llvm.experimental.constrained.fma.f16(half %b, half %extract, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret half %0
+}
+
+define <4 x half> @test_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfms_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <4 x half> %b
+ %lane = shufflevector <4 x half> %c, <4 x half> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %fmla2 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %fneg, <4 x half> %lane, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %fmla2
+}
+
+define <8 x half> @test_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfmsq_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <8 x half> %b
+ %lane = shufflevector <4 x half> %c, <4 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %fmla2 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %fneg, <8 x half> %lane, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %fmla2
+}
+
+define <4 x half> @test_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfms_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <4 x half> %b
+ %lane = shufflevector <8 x half> %c, <8 x half> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %lane, <4 x half> %fneg, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmsq_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <8 x half> %b
+ %lane = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %lane, <8 x half> %fneg, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define <4 x half> @test_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) #0 {
+; CHECK-LABEL: test_vfms_n_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <4 x half> %b
+ %vecinit = insertelement <4 x half> poison, half %c, i64 0
+ %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> poison, <4 x i32> zeroinitializer
+ %0 = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %fneg, <4 x half> %vecinit3, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <4 x half> %0
+}
+
+define <8 x half> @test_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) #0 {
+; CHECK-LABEL: test_vfmsq_n_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
+; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
+; CHECK-NEXT: ret
+entry:
+ %fneg = fneg <8 x half> %b
+ %vecinit = insertelement <8 x half> poison, half %c, i64 0
+ %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> poison, <8 x i32> zeroinitializer
+ %0 = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %fneg, <8 x half> %vecinit7, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret <8 x half> %0
+}
+
+define half @test_vfmsh_lane_f16(half %a, half %b, <4 x half> %c) #0 {
+; CHECK-LABEL: test_vfmsh_lane_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fneg s1, s1
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: fmla h0, h1, v2.h[3]
+; CHECK-NEXT: ret
+entry:
+ %conv = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") #1
+ %fneg = fneg float %conv
+ %0 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %fneg, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %extract = extractelement <4 x half> %c, i64 3
+ %1 = call half @llvm.experimental.constrained.fma.f16(half %0, half %extract, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret half %1
+}
+
+define half @test_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c) #0 {
+; CHECK-LABEL: test_vfmsh_laneq_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fneg s1, s1
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: fmla h0, h1, v2.h[7]
+; CHECK-NEXT: ret
+entry:
+ %conv = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") #1
+ %fneg = fneg float %conv
+ %0 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %fneg, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ %extract = extractelement <8 x half> %c, i64 7
+ %1 = call half @llvm.experimental.constrained.fma.f16(half %0, half %extract, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1
+ ret half %1
+}
+
+attributes #0 = { noinline nounwind strictfp "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
+attributes #1 = { strictfp }
More information about the llvm-commits
mailing list